<a href="https://www.kaggle.com/code/ismetsemedov/rsna-screening-mammography-breast-cancer-detection?scriptVersionId=205968748" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import pandas as pd
import pydicom
import cv2
from pathlib import Path
from sklearn.model_selection import GroupKFold
import albumentations as A
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import os

class RSNAPreprocessor:
    def __init__(self, **kwargs):
        """
        Initialize the preprocessor
        
        Args:
            base_path (str): Base path to the RSNA dataset
            target_size (tuple): Target size for resizing images
            output_format (str): Format to save processed images
        """
        self.base_path = Path(kwargs.get('base_path', "/kaggle/input/rsna-breast-cancer-detection"))
        self.target_size = kwargs.get('target_size', (2048, 2048))
        self.output_format = kwargs.get('output_format', 'png').lower()
        
        self.train_images_path = self.base_path / "train_images"
        self.test_images_path = self.base_path / "test_images"
        
        if self.output_format not in ['png', 'jpg', 'jpeg']:
            raise ValueError("output_format must be 'png' or 'jpg'/'jpeg'")

    def get_dicom_path(self, patient_id, image_id, is_train=True):
        """
        Get the path to a DICOM file
        """
        images_path = self.train_images_path if is_train else self.test_images_path
        return images_path / str(patient_id) / f"{image_id}"

    def read_dicom(self, patient_id, image_id, is_train=True):
        """
        Read and preprocess DICOM image
        """
        dicom_path = self.get_dicom_path(patient_id, image_id, is_train)
        try:
            # Add .dcm extension if not in the image_id
            if not str(dicom_path).endswith('.dcm'):
                dicom_path = Path(str(dicom_path) + '.dcm')

            print(f"Reading DICOM from: {dicom_path}")  # Debug print
            
            if not dicom_path.exists():
                print(f"File not found: {dicom_path}")
                return None
                
            dicom = pydicom.dcmread(dicom_path)
            
            # Process image
            img = dicom.pixel_array
            
            # Convert to float and normalize
            img = img.astype(float)
            if img.max() != img.min():
                img = (img - img.min()) / (img.max() - img.min())
            
            # Scale to 0-255 range
            img = (img * 255).astype(np.uint8)
            
            # Apply CLAHE for better contrast
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
            img = clahe.apply(img)
            
            # Resize while maintaining aspect ratio
            aspect = img.shape[0] / img.shape[1]
            if aspect > 1:
                new_height = self.target_size[0]
                new_width = int(new_height / aspect)
            else:
                new_width = self.target_size[1]
                new_height = int(new_width * aspect)
            
            img = cv2.resize(img, (new_width, new_height))
            
            # Add padding to reach target size
            top_pad = (self.target_size[0] - img.shape[0]) // 2
            bottom_pad = self.target_size[0] - img.shape[0] - top_pad
            left_pad = (self.target_size[1] - img.shape[1]) // 2
            right_pad = self.target_size[1] - img.shape[1] - left_pad
            
            img = cv2.copyMakeBorder(
                img, top_pad, bottom_pad, left_pad, right_pad,
                cv2.BORDER_CONSTANT, value=0
            )
            
            return img

        except Exception as e:
            print(f"Error processing image {image_id} for patient {patient_id}: {str(e)}")
            return None

    def save_image(self, img, output_path):
        """
        Save processed image in specified format
        """
        if img is not None:
            if self.output_format == 'png':
                cv2.imwrite(str(output_path.with_suffix('.png')), img)
            else:  # jpg/jpeg
                cv2.imwrite(str(output_path.with_suffix('.jpg')), img, [cv2.IMWRITE_JPEG_QUALITY, 100])

    def process_and_save(self, metadata_df, output_dir, num_samples=None):
        """
        Process images and save them in the specified format
        """
        if num_samples:
            metadata_df = metadata_df.head(num_samples)
        
        # Create output directory structure
        output_dir = Path(output_dir)
        output_dir.mkdir(exist_ok=True)
        
        # Create subdirectories for different views
        for view in ['CC', 'MLO']:
            (output_dir / view).mkdir(exist_ok=True)
            (output_dir / view / 'L').mkdir(exist_ok=True)
            (output_dir / view / 'R').mkdir(exist_ok=True)
        
        processed_count = 0
        failed_count = 0
        
        print("\nProcessing metadata shape:", metadata_df.shape)
        print("Sample row:")
        print(metadata_df.iloc[0])
        
        for idx, row in tqdm(metadata_df.iterrows(), total=len(metadata_df)):
            try:
                img = self.read_dicom(
                    patient_id=str(row['patient_id']),
                    image_id=str(row['image_id'])
                )
                
                if img is not None:
                    # Create organized directory structure based on view and laterality
                    view = row['view']      # CC or MLO
                    laterality = row['laterality']  # L or R
                    
                    # Define output path with organized structure
                    output_path = output_dir / view / laterality / f"{row['patient_id']}_{row['image_id']}"
                    
                    # Save the image
                    self.save_image(img, output_path)
                    processed_count += 1
                    
                    # Save a thumbnail for quick viewing
                    thumbnail = cv2.resize(img, (512, 512))
                    thumbnail_path = output_path.with_name(f"{output_path.stem}_thumb")
                    self.save_image(thumbnail, thumbnail_path)
                    
                else:
                    failed_count += 1
                    
            except Exception as e:
                failed_count += 1
                print(f"Error processing row {idx}: {str(e)}")
                continue
                
        return processed_count, failed_count

def main():
    print("Initializing RSNA Mammography Preprocessing...")
    
    # Initialize preprocessor with named parameters
    preprocessor = RSNAPreprocessor(
        base_path="/kaggle/input/rsna-breast-cancer-detection",
        target_size=(2048, 2048),
        output_format='png'
    )
    
    try:
        # Read metadata
        train_df = pd.read_csv("/kaggle/input/rsna-breast-cancer-detection/train.csv")
        print(f"Total images to process: {len(train_df)}")
        
        # Create output directory
        output_dir = Path("/kaggle/working/processed_images")
        
        # Process images
        print("\nProcessing images...")
        processed_count, failed_count = preprocessor.process_and_save(
            train_df,
            output_dir,
            num_samples=5
        )
        
        print(f"\nProcessing completed:")
        print(f"Successfully processed: {processed_count}")
        print(f"Failed: {failed_count}")
        
        # Save processing summary
        summary = {
            'total_images': len(train_df),
            'processed': processed_count,
            'failed': failed_count,
            'success_rate': processed_count / (processed_count + failed_count) * 100 if (processed_count + failed_count) > 0 else 0
        }
        
        pd.DataFrame([summary]).to_csv(output_dir / 'processing_summary.csv', index=False)
        print("\nProcessing summary saved.")
        
        print("\nOutput directory structure:")
        print(f"{output_dir}/")
        print("├── CC/")
        print("│   ├── L/")
        print("│   └── R/")
        print("├── MLO/")
        print("│   ├── L/")
        print("│   └── R/")
        print("└── processing_summary.csv")
        
    except Exception as e:
        print(f"\nAn error occurred: {str(e)}")
        import traceback
        print(traceback.format_exc())

if __name__ == "__main__":
    main()

  check_for_updates()


Initializing RSNA Mammography Preprocessing...
Total images to process: 54706

Processing images...

Processing metadata shape: (5, 14)
Sample row:
site_id                            2
patient_id                     10006
image_id                   462822612
laterality                         L
view                              CC
age                             61.0
cancer                             0
biopsy                             0
invasive                           0
BIRADS                           NaN
implant                            0
density                          NaN
machine_id                        29
difficult_negative_case        False
Name: 0, dtype: object


  0%|          | 0/5 [00:00<?, ?it/s]

Reading DICOM from: /kaggle/input/rsna-breast-cancer-detection/train_images/10006/462822612.dcm
Reading DICOM from: /kaggle/input/rsna-breast-cancer-detection/train_images/10006/1459541791.dcm
Reading DICOM from: /kaggle/input/rsna-breast-cancer-detection/train_images/10006/1864590858.dcm
Reading DICOM from: /kaggle/input/rsna-breast-cancer-detection/train_images/10006/1874946579.dcm
Reading DICOM from: /kaggle/input/rsna-breast-cancer-detection/train_images/10011/220375232.dcm

Processing completed:
Successfully processed: 5
Failed: 0

Processing summary saved.

Output directory structure:
/kaggle/working/processed_images/
├── CC/
│   ├── L/
│   └── R/
├── MLO/
│   ├── L/
│   └── R/
└── processing_summary.csv


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import cv2
import numpy as np

def analyze_processed_images(base_dir="/kaggle/working/processed_images"):
    base_dir = Path(base_dir)
    
    # Initialize counters
    image_stats = {
        'CC': {'L': 0, 'R': 0},
        'MLO': {'L': 0, 'R': 0}
    }
    
    # Collect image information
    image_info = []
    
    # Traverse directory structure
    for view in ['CC', 'MLO']:
        for laterality in ['L', 'R']:
            path = base_dir / view / laterality
            if path.exists():
                files = list(path.glob('*.png'))  # Non-thumbnail images
                image_stats[view][laterality] = len(files)
                
                # Analyze sample images
                for img_path in files:
                    if not img_path.stem.endswith('thumb'):  # Skip thumbnails
                        img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                        if img is not None:
                            image_info.append({
                                'path': str(img_path),
                                'view': view,
                                'laterality': laterality,
                                'size': img.shape,
                                'mean_intensity': img.mean(),
                                'std_intensity': img.std()
                            })
    
    # Create summary visualizations
    plt.figure(figsize=(15, 5))
    
    # Plot 1: Distribution of views
    plt.subplot(1, 3, 1)
    data = [[image_stats['CC']['L'], image_stats['CC']['R']],
            [image_stats['MLO']['L'], image_stats['MLO']['R']]]
    sns.heatmap(data, annot=True, fmt='d', 
                xticklabels=['Left', 'Right'],
                yticklabels=['CC', 'MLO'])
    plt.title('Distribution of Views')
    
    # Plot 2: Mean Intensities
    if image_info:
        plt.subplot(1, 3, 2)
        intensities = [info['mean_intensity'] for info in image_info]
        plt.hist(intensities, bins=20)
        plt.title('Mean Image Intensities')
        plt.xlabel('Intensity')
        plt.ylabel('Count')
    
    # Plot 3: Image Sizes
    if image_info:
        plt.subplot(1, 3, 3)
        heights = [info['size'][0] for info in image_info]
        widths = [info['size'][1] for info in image_info]
        plt.scatter(widths, heights)
        plt.title('Image Dimensions')
        plt.xlabel('Width')
        plt.ylabel('Height')
    
    plt.tight_layout()
    plt.savefig(base_dir / 'analysis_summary.png')
    plt.close()
    
    # Print summary
    print("\nImage Distribution Summary:")
    print("---------------------------")
    for view in ['CC', 'MLO']:
        for laterality in ['L', 'R']:
            count = image_stats[view][laterality]
            print(f"{view}-{laterality}: {count} images")
    
    if image_info:
        print("\nImage Statistics:")
        print("----------------")
        df = pd.DataFrame(image_info)
        print(df.groupby(['view', 'laterality']).agg({
            'mean_intensity': ['mean', 'std'],
            'size': lambda x: list(set(x))
        }).round(2))
    
    return image_stats, image_info

# Run the analysis
if __name__ == "__main__":
    image_stats, image_info = analyze_processed_images()


Image Distribution Summary:
---------------------------
CC-L: 4 images
CC-R: 2 images
MLO-L: 2 images
MLO-R: 2 images

Image Statistics:
----------------
                mean_intensity                    size
                          mean     std        <lambda>
view laterality                                       
CC   L                  121.51  139.84  [(2048, 2048)]
     R                  222.65     NaN  [(2048, 2048)]
MLO  L                  211.56     NaN  [(2048, 2048)]
     R                  212.78     NaN  [(2048, 2048)]


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
import cv2

def visualize_mammogram_analysis(base_dir="/kaggle/working/processed_images"):
    base_dir = Path(base_dir)
    
    # Create figure with subplots
    plt.figure(figsize=(20, 10))
    
    # 1. View Distribution Bar Plot
    plt.subplot(2, 2, 1)
    views_data = {
        'CC-L': 4, 'CC-R': 2,
        'MLO-L': 2, 'MLO-R': 2
    }
    plt.bar(views_data.keys(), views_data.values())
    plt.title('Distribution of Mammogram Views')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45)
    
    # 2. Mean Intensity Comparison
    plt.subplot(2, 2, 2)
    intensities = {
        'CC-L': 121.51,
        'CC-R': 222.65,
        'MLO-L': 211.56,
        'MLO-R': 212.78
    }
    plt.bar(intensities.keys(), intensities.values(), color='skyblue')
    plt.title('Mean Intensity by View')
    plt.ylabel('Mean Intensity')
    plt.xticks(rotation=45)
    
    # 3. Left vs Right Comparison
    plt.subplot(2, 2, 3)
    left_right = {
        'Left (CC)': 4,
        'Right (CC)': 2,
        'Left (MLO)': 2,
        'Right (MLO)': 2
    }
    colors = ['lightcoral', 'lightcoral', 'lightskyblue', 'lightskyblue']
    plt.bar(left_right.keys(), left_right.values(), color=colors)
    plt.title('Left vs Right Distribution')
    plt.ylabel('Number of Images')
    plt.xticks(rotation=45)
    
    # 4. Intensity Variation
    plt.subplot(2, 2, 4)
    std_data = {
        'CC-L': 139.84,
        'CC-R': 0,  # NaN in original data
        'MLO-L': 0, # NaN in original data
        'MLO-R': 0  # NaN in original data
    }
    plt.bar(std_data.keys(), std_data.values(), color='lightgreen')
    plt.title('Intensity Variation (Standard Deviation)')
    plt.ylabel('Standard Deviation')
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.savefig(base_dir / 'detailed_analysis.png')
    plt.close()

def print_detailed_analysis():
    print("\nDetailed Analysis:")
    print("-----------------")
    print("1. View Distribution:")
    print("   - CC Views dominate with 6 images (4 left, 2 right)")
    print("   - MLO Views are balanced with 4 images (2 left, 2 right)")
    
    print("\n2. Intensity Patterns:")
    print("   - CC Left shows lower intensity (121.51) with high variation")
    print("   - Other views show similar intensities (210-223)")
    print("   - Right views are consistently brighter than left views")
    
    print("\n3. Image Standardization:")
    print("   - All images successfully standardized to 2048x2048")
    print("   - Consistent resolution maintains image quality")
    
    print("\n4. Potential Considerations:")
    print("   - Uneven distribution between left and right CC views")
    print("   - High variation in CC Left might need investigation")
    print("   - MLO views show more consistent intensities")

# Run the visualization and analysis
if __name__ == "__main__":
    visualize_mammogram_analysis()
    print_detailed_analysis()


Detailed Analysis:
-----------------
1. View Distribution:
   - CC Views dominate with 6 images (4 left, 2 right)
   - MLO Views are balanced with 4 images (2 left, 2 right)

2. Intensity Patterns:
   - CC Left shows lower intensity (121.51) with high variation
   - Other views show similar intensities (210-223)
   - Right views are consistently brighter than left views

3. Image Standardization:
   - All images successfully standardized to 2048x2048
   - Consistent resolution maintains image quality

4. Potential Considerations:
   - Uneven distribution between left and right CC views
   - High variation in CC Left might need investigation
   - MLO views show more consistent intensities


In [4]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

def analyze_image_characteristics(base_dir="/kaggle/working/processed_images"):
    print("\nDetailed Image Analysis:")
    print("------------------------")
    
    base_dir = Path(base_dir)  # Convert string to Path object
    
    def analyze_view(view, laterality):
        view_path = base_dir / view / laterality
        characteristics = []
        
        if view_path.exists():
            for img_path in view_path.glob('*.png'):
                if not str(img_path).endswith('thumb.png'):
                    try:
                        img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                        if img is not None:
                            chars = {
                                'mean': img.mean(),
                                'std': img.std(),
                                'min': img.min(),
                                'max': img.max(),
                                'contrast': img.max() - img.min(),
                                'filename': img_path.name
                            }
                            characteristics.append(chars)
                    except Exception as e:
                        print(f"Error processing {img_path}: {str(e)}")
        
        return characteristics

    views = ['CC', 'MLO']
    lateralities = ['L', 'R']
    
    all_results = {}
    
    for view in views:
        print(f"\n{view} View Analysis:")
        view_results = {}
        
        for lat in lateralities:
            chars = analyze_view(view, lat)
            if chars:
                print(f"\n{lat} Side ({len(chars)} images):")
                for char in chars:
                    print(f"\nImage: {char['filename']}")
                    print(f"  Average Intensity: {char['mean']:.2f}")
                    print(f"  Standard Deviation: {char['std']:.2f}")
                    print(f"  Contrast Range: {char['min']} - {char['max']} ({char['contrast']})")
                
                # Store results for later use
                view_results[lat] = chars
        
        all_results[view] = view_results
    
    # Additional Analysis
    print("\nComparative Analysis:")
    print("--------------------")
    
    for view in views:
        print(f"\n{view} View:")
        if view in all_results and 'L' in all_results[view] and 'R' in all_results[view]:
            left_intensities = [c['mean'] for c in all_results[view]['L']]
            right_intensities = [c['mean'] for c in all_results[view]['R']]
            
            print(f"Left Side Average: {np.mean(left_intensities):.2f}")
            print(f"Right Side Average: {np.mean(right_intensities):.2f}")
            
            if len(left_intensities) > 0 and len(right_intensities) > 0:
                intensity_diff = abs(np.mean(left_intensities) - np.mean(right_intensities))
                print(f"Intensity Difference: {intensity_diff:.2f}")

def visualize_intensity_distributions(base_dir="/kaggle/working/processed_images"):
    base_dir = Path(base_dir)
    
    plt.figure(figsize=(15, 10))
    
    def get_image_intensities(view, laterality):
        intensities = []
        view_path = base_dir / view / laterality
        if view_path.exists():
            for img_path in view_path.glob('*.png'):
                if not str(img_path).endswith('thumb.png'):
                    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                    if img is not None:
                        intensities.append(img.mean())
        return intensities

    # Plot intensity distributions
    views = ['CC', 'MLO']
    lateralities = ['L', 'R']
    
    for idx, view in enumerate(views):
        plt.subplot(1, 2, idx+1)
        
        for lat in lateralities:
            intensities = get_image_intensities(view, lat)
            if intensities:
                plt.hist(intensities, alpha=0.5, label=f'{lat} Side', bins=20)
        
        plt.title(f'{view} View Intensity Distribution')
        plt.xlabel('Mean Intensity')
        plt.ylabel('Frequency')
        plt.legend()
    
    plt.tight_layout()
    plt.savefig(base_dir / 'intensity_distribution.png')
    plt.close()

if __name__ == "__main__":
    print("Starting mammogram analysis...")
    analyze_image_characteristics()
    print("\nGenerating intensity distribution plots...")
    visualize_intensity_distributions()
    print("Analysis complete!")

Starting mammogram analysis...

Detailed Image Analysis:
------------------------

CC View Analysis:

L Side (2 images):

Image: 10011_220375232.png
  Average Intensity: 22.63
  Standard Deviation: 47.31
  Contrast Range: 0 - 255 (255)

Image: 10006_462822612.png
  Average Intensity: 220.39
  Standard Deviation: 78.21
  Contrast Range: 0 - 255 (255)

R Side (1 images):

Image: 10006_1874946579.png
  Average Intensity: 222.65
  Standard Deviation: 76.77
  Contrast Range: 0 - 255 (255)

MLO View Analysis:

L Side (1 images):

Image: 10006_1459541791.png
  Average Intensity: 211.56
  Standard Deviation: 82.50
  Contrast Range: 0 - 255 (255)

R Side (1 images):

Image: 10006_1864590858.png
  Average Intensity: 212.78
  Standard Deviation: 82.06
  Contrast Range: 0 - 255 (255)

Comparative Analysis:
--------------------

CC View:
Left Side Average: 121.51
Right Side Average: 222.65
Intensity Difference: 101.14

MLO View:
Left Side Average: 211.56
Right Side Average: 212.78
Intensity Differe

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path

def create_detailed_visualization(data):
    plt.figure(figsize=(20, 15))
    
    # 1. Intensity Comparison
    plt.subplot(2, 2, 1)
    intensities = {
        'CC-L (10011)': 22.63,
        'CC-L (10006)': 220.39,
        'CC-R': 222.65,
        'MLO-L': 211.56,
        'MLO-R': 212.78
    }
    colors = ['red', 'blue', 'blue', 'green', 'green']
    plt.bar(intensities.keys(), intensities.values(), color=colors)
    plt.axhline(y=np.mean([v for v in intensities.values() if v > 200]), 
                color='black', linestyle='--', label='Normal Range Mean')
    plt.title('Average Intensities Across Views')
    plt.xticks(rotation=45)
    plt.ylabel('Mean Intensity')
    plt.legend()

    # 2. Standard Deviation Comparison
    plt.subplot(2, 2, 2)
    std_dev = {
        'CC-L (10011)': 47.31,
        'CC-L (10006)': 78.21,
        'CC-R': 76.77,
        'MLO-L': 82.50,
        'MLO-R': 82.06
    }
    plt.bar(std_dev.keys(), std_dev.values(), color=colors)
    plt.title('Standard Deviation Comparison')
    plt.xticks(rotation=45)
    plt.ylabel('Standard Deviation')

    # 3. Left-Right Comparison
    plt.subplot(2, 2, 3)
    differences = {
        'CC Views': 101.14,
        'MLO Views': 1.22
    }
    plt.bar(differences.keys(), differences.values(), color=['red', 'green'])
    plt.title('Left-Right Intensity Differences')
    plt.ylabel('Intensity Difference')

    # 4. Normal Range Analysis
    plt.subplot(2, 2, 4)
    normal_values = [v for v in intensities.values() if v > 200]
    normal_mean = np.mean(normal_values)
    normal_std = np.std(normal_values)
    
    plt.axvspan(normal_mean - 2*normal_std, normal_mean + 2*normal_std, 
                color='green', alpha=0.2, label='Normal Range')
    for name, value in intensities.items():
        color = 'red' if value < 100 else 'blue'
        plt.scatter(value, 1, c=color, s=100, label=name)
    plt.title('Intensity Distribution vs Normal Range')
    plt.xlabel('Intensity')
    plt.yticks([])

    plt.tight_layout()
    return plt

def print_clinical_analysis():
    print("\nClinical Analysis Summary:")
    print("--------------------------")
    print("1. Image Quality Concerns:")
    print("   - One CC Left image (10011) shows abnormally low intensity (22.63)")
    print("   - Other images show consistent normal range (210-223)")
    
    print("\n2. View Consistency:")
    print("   - MLO views show excellent consistency (difference: 1.22)")
    print("   - CC views show high variation due to one outlier")
    print("   - Normal images have consistent std dev (76-82)")
    
    print("\n3. Technical Recommendations:")
    print("   - Investigate low-intensity CC Left image")
    print("   - Consider standardizing exposure settings")
    print("   - Monitor left-right consistency in CC views")
    
    print("\n4. Clinical Implications:")
    print("   - MLO views suitable for direct comparison")
    print("   - CC views need careful consideration due to intensity variation")
    print("   - Consider retaking abnormal intensity image")

if __name__ == "__main__":
    # Create and save visualization
    plt = create_detailed_visualization({})
    plt.savefig('/kaggle/working/processed_images/detailed_analysis.png')
    plt.close()
    
    # Print analysis
    print_clinical_analysis()


Clinical Analysis Summary:
--------------------------
1. Image Quality Concerns:
   - One CC Left image (10011) shows abnormally low intensity (22.63)
   - Other images show consistent normal range (210-223)

2. View Consistency:
   - MLO views show excellent consistency (difference: 1.22)
   - CC views show high variation due to one outlier
   - Normal images have consistent std dev (76-82)

3. Technical Recommendations:
   - Investigate low-intensity CC Left image
   - Consider standardizing exposure settings
   - Monitor left-right consistency in CC views

4. Clinical Implications:
   - MLO views suitable for direct comparison
   - CC views need careful consideration due to intensity variation
   - Consider retaking abnormal intensity image


In [6]:
import numpy as np
import pandas as pd
import cv2
from pathlib import Path
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt

class MammogramQualityAssessment:
    def __init__(self):
        # Define quality thresholds based on the analysis
        self.quality_thresholds = {
            'intensity': {
                'min': 180,  # Minimum acceptable mean intensity
                'max': 240,  # Maximum acceptable mean intensity
                'std_dev_range': (70, 90)  # Acceptable std dev range
            },
            'symmetry': {
                'max_intensity_diff': 20,  # Maximum acceptable L/R difference
                'max_std_diff': 10  # Maximum acceptable L/R std dev difference
            }
        }
    
    def assess_image_quality(self, img: np.ndarray) -> Dict:
        """
        Assess individual image quality
        """
        quality_metrics = {
            'mean_intensity': img.mean(),
            'std_dev': img.std(),
            'contrast': img.max() - img.min(),
            'histogram_uniformity': cv2.calcHist([img], [0], None, [256], [0, 256]).std(),
            'quality_score': 0
        }
        
        # Calculate quality score (0-100)
        intensity_score = self._calculate_intensity_score(quality_metrics['mean_intensity'])
        std_score = self._calculate_std_score(quality_metrics['std_dev'])
        quality_metrics['quality_score'] = (intensity_score + std_score) / 2
        
        return quality_metrics
    
    def _calculate_intensity_score(self, intensity: float) -> float:
        """Calculate score based on intensity"""
        if intensity < self.quality_thresholds['intensity']['min']:
            return max(0, (intensity / self.quality_thresholds['intensity']['min']) * 100)
        elif intensity > self.quality_thresholds['intensity']['max']:
            return max(0, (self.quality_thresholds['intensity']['max'] / intensity) * 100)
        return 100

    def _calculate_std_score(self, std_dev: float) -> float:
        """Calculate score based on standard deviation"""
        min_std, max_std = self.quality_thresholds['intensity']['std_dev_range']
        if std_dev < min_std:
            return max(0, (std_dev / min_std) * 100)
        elif std_dev > max_std:
            return max(0, (max_std / std_dev) * 100)
        return 100

    def assess_study_quality(self, base_dir: str) -> Dict:
        """
        Assess quality of entire mammography study
        """
        base_dir = Path(base_dir)
        study_assessment = {
            'images': {},
            'symmetry': {},
            'recommendations': []
        }
        
        # Assess each view
        for view in ['CC', 'MLO']:
            view_metrics = {'L': [], 'R': []}
            
            for laterality in ['L', 'R']:
                view_path = base_dir / view / laterality
                if view_path.exists():
                    for img_path in view_path.glob('*.png'):
                        if not str(img_path).endswith('thumb.png'):
                            img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                            if img is not None:
                                metrics = self.assess_image_quality(img)
                                view_metrics[laterality].append({
                                    'file': img_path.name,
                                    **metrics
                                })
            
            # Assess symmetry
            if view_metrics['L'] and view_metrics['R']:
                left_intensities = [m['mean_intensity'] for m in view_metrics['L']]
                right_intensities = [m['mean_intensity'] for m in view_metrics['R']]
                
                intensity_diff = abs(np.mean(left_intensities) - np.mean(right_intensities))
                study_assessment['symmetry'][view] = {
                    'intensity_difference': intensity_diff,
                    'is_symmetric': intensity_diff <= self.quality_thresholds['symmetry']['max_intensity_diff']
                }
            
            study_assessment['images'][view] = view_metrics
        
        # Generate recommendations
        self._generate_recommendations(study_assessment)
        
        return study_assessment
    
    def _generate_recommendations(self, assessment: Dict):
        """Generate specific recommendations based on assessment"""
        for view in ['CC', 'MLO']:
            for laterality in ['L', 'R']:
                for img_data in assessment['images'][view].get(laterality, []):
                    if img_data['quality_score'] < 70:
                        assessment['recommendations'].append({
                            'priority': 'HIGH',
                            'issue': f"Low quality score ({img_data['quality_score']:.1f}) in {view}-{laterality} image: {img_data['file']}",
                            'action': "Consider retaking image"
                        })
                    elif img_data['mean_intensity'] < self.quality_thresholds['intensity']['min']:
                        assessment['recommendations'].append({
                            'priority': 'MEDIUM',
                            'issue': f"Low intensity ({img_data['mean_intensity']:.1f}) in {view}-{laterality} image: {img_data['file']}",
                            'action': "Check exposure settings"
                        })
            
            if view in assessment['symmetry']:
                if not assessment['symmetry'][view]['is_symmetric']:
                    assessment['recommendations'].append({
                        'priority': 'HIGH',
                        'issue': f"Asymmetric {view} views (difference: {assessment['symmetry'][view]['intensity_difference']:.1f})",
                        'action': "Review positioning and exposure settings"
                    })

def generate_quality_report(base_dir: str = "/kaggle/working/processed_images"):
    """Generate comprehensive quality report"""
    qa = MammogramQualityAssessment()
    assessment = qa.assess_study_quality(base_dir)
    
    print("\nMammogram Quality Assessment Report")
    print("==================================")
    
    print("\nImage Quality Scores:")
    print("-------------------")
    for view in ['CC', 'MLO']:
        print(f"\n{view} View:")
        for laterality in ['L', 'R']:
            for img_data in assessment['images'][view].get(laterality, []):
                print(f"\n{laterality} Side - {img_data['file']}:")
                print(f"  Quality Score: {img_data['quality_score']:.1f}/100")
                print(f"  Mean Intensity: {img_data['mean_intensity']:.1f}")
                print(f"  Standard Deviation: {img_data['std_dev']:.1f}")
    
    print("\nSymmetry Analysis:")
    print("-----------------")
    for view, symmetry in assessment['symmetry'].items():
        print(f"\n{view} View:")
        print(f"  Intensity Difference: {symmetry['intensity_difference']:.1f}")
        print(f"  Status: {'ACCEPTABLE' if symmetry['is_symmetric'] else 'NEEDS REVIEW'}")
    
    print("\nRecommendations:")
    print("---------------")
    for rec in assessment['recommendations']:
        print(f"\n[{rec['priority']}] {rec['issue']}")
        print(f"Action: {rec['action']}")

if __name__ == "__main__":
    generate_quality_report()


Mammogram Quality Assessment Report

Image Quality Scores:
-------------------

CC View:

L Side - 10011_220375232.png:
  Quality Score: 40.1/100
  Mean Intensity: 22.6
  Standard Deviation: 47.3

L Side - 10006_462822612.png:
  Quality Score: 100.0/100
  Mean Intensity: 220.4
  Standard Deviation: 78.2

R Side - 10006_1874946579.png:
  Quality Score: 100.0/100
  Mean Intensity: 222.7
  Standard Deviation: 76.8

MLO View:

L Side - 10006_1459541791.png:
  Quality Score: 100.0/100
  Mean Intensity: 211.6
  Standard Deviation: 82.5

R Side - 10006_1864590858.png:
  Quality Score: 100.0/100
  Mean Intensity: 212.8
  Standard Deviation: 82.1

Symmetry Analysis:
-----------------

CC View:
  Intensity Difference: 101.1
  Status: NEEDS REVIEW

MLO View:
  Intensity Difference: 1.2
  Status: ACCEPTABLE

Recommendations:
---------------

[HIGH] Low quality score (40.1) in CC-L image: 10011_220375232.png
Action: Consider retaking image

[HIGH] Asymmetric CC views (difference: 101.1)
Action: Re

In [7]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path

class MammogramQualityVisualizer:
    def __init__(self):
        self.quality_data = {
            'CC': {
                'L': [
                    {'file': '10011_220375232.png', 'score': 40.1, 'intensity': 22.6, 'std': 47.3},
                    {'file': '10006_462822612.png', 'score': 100.0, 'intensity': 220.4, 'std': 78.2}
                ],
                'R': [
                    {'file': '10006_1874946579.png', 'score': 100.0, 'intensity': 222.7, 'std': 76.8}
                ]
            },
            'MLO': {
                'L': [
                    {'file': '10006_1459541791.png', 'score': 100.0, 'intensity': 211.6, 'std': 82.5}
                ],
                'R': [
                    {'file': '10006_1864590858.png', 'score': 100.0, 'intensity': 212.8, 'std': 82.1}
                ]
            }
        }
        
    def create_quality_dashboard(self, output_path):
        plt.figure(figsize=(20, 15))
        
        # 1. Quality Scores Comparison
        plt.subplot(2, 2, 1)
        self._plot_quality_scores()
        
        # 2. Intensity Distribution
        plt.subplot(2, 2, 2)
        self._plot_intensity_distribution()
        
        # 3. Symmetry Analysis
        plt.subplot(2, 2, 3)
        self._plot_symmetry_analysis()
        
        # 4. Standard Deviation Analysis
        plt.subplot(2, 2, 4)
        self._plot_std_analysis()
        
        plt.tight_layout()
        plt.savefig(output_path)
        plt.close()
    
    def _plot_quality_scores(self):
        labels = []
        scores = []
        colors = []
        
        for view in ['CC', 'MLO']:
            for side in ['L', 'R']:
                for img in self.quality_data[view][side]:
                    labels.append(f"{view}-{side}\n{img['file'].split('_')[0]}")
                    scores.append(img['score'])
                    colors.append('red' if img['score'] < 70 else 'green')
        
        plt.bar(labels, scores, color=colors)
        plt.axhline(y=70, color='r', linestyle='--', label='Minimum Acceptable Score')
        plt.title('Quality Scores by Image')
        plt.ylabel('Quality Score')
        plt.xticks(rotation=45)
        plt.ylim(0, 105)
        plt.legend()

    def _plot_intensity_distribution(self):
        normal_intensities = []
        abnormal_intensities = []
        labels = []
        
        for view in ['CC', 'MLO']:
            for side in ['L', 'R']:
                for img in self.quality_data[view][side]:
                    if img['score'] >= 70:
                        normal_intensities.append(img['intensity'])
                    else:
                        abnormal_intensities.append(img['intensity'])
                    labels.append(f"{view}-{side}")
        
        plt.boxplot([normal_intensities], positions=[0], labels=['Normal'])
        if abnormal_intensities:
            plt.boxplot([abnormal_intensities], positions=[1], labels=['Abnormal'])
        
        plt.title('Intensity Distribution')
        plt.ylabel('Mean Intensity')
        
    def _plot_symmetry_analysis(self):
        views = ['CC', 'MLO']
        differences = [101.1, 1.2]  # From the analysis
        colors = ['red', 'green']
        
        plt.bar(views, differences, color=colors)
        plt.axhline(y=20, color='r', linestyle='--', label='Maximum Acceptable Difference')
        plt.title('Left-Right Intensity Differences')
        plt.ylabel('Intensity Difference')
        plt.legend()

    def _plot_std_analysis(self):
        labels = []
        stds = []
        colors = []
        
        for view in ['CC', 'MLO']:
            for side in ['L', 'R']:
                for img in self.quality_data[view][side]:
                    labels.append(f"{view}-{side}\n{img['file'].split('_')[0]}")
                    stds.append(img['std'])
                    colors.append('red' if img['std'] < 70 else 'green')
        
        plt.bar(labels, stds, color=colors)
        plt.axhline(y=70, color='r', linestyle='--', label='Minimum Expected STD')
        plt.axhline(y=90, color='r', linestyle='--', label='Maximum Expected STD')
        plt.title('Standard Deviation Analysis')
        plt.ylabel('Standard Deviation')
        plt.xticks(rotation=45)
        plt.legend()

def generate_summary_report():
    print("\nMammogram Quality Summary Report")
    print("================================")
    
    print("\nKey Findings:")
    print("1. Critical Issues:")
    print("   - One CC Left image (10011) shows severely low quality (Score: 40.1/100)")
    print("   - CC view shows significant left-right asymmetry (Difference: 101.1)")
    
    print("\n2. Acceptable Quality:")
    print("   - MLO views show excellent symmetry (Difference: 1.2)")
    print("   - Four out of five images meet quality standards (Score: 100/100)")
    
    print("\n3. Technical Parameters:")
    print("   - Normal intensity range: 210-223")
    print("   - Standard deviation range: 76-83 (acceptable)")
    print("   - Outlier intensity: 22.6 (requires investigation)")
    
    print("\nRecommended Actions:")
    print("1. Immediate:")
    print("   - Retake CC Left image for patient 10011")
    print("   - Check exposure settings for CC Left views")
    
    print("\n2. Quality Improvement:")
    print("   - Review CC positioning protocol")
    print("   - Implement exposure standardization")
    print("   - Consider equipment calibration check")

if __name__ == "__main__":
    visualizer = MammogramQualityVisualizer()
    visualizer.create_quality_dashboard("/kaggle/working/processed_images/quality_dashboard.png")
    generate_summary_report()


Mammogram Quality Summary Report

Key Findings:
1. Critical Issues:
   - One CC Left image (10011) shows severely low quality (Score: 40.1/100)
   - CC view shows significant left-right asymmetry (Difference: 101.1)

2. Acceptable Quality:
   - MLO views show excellent symmetry (Difference: 1.2)
   - Four out of five images meet quality standards (Score: 100/100)

3. Technical Parameters:
   - Normal intensity range: 210-223
   - Standard deviation range: 76-83 (acceptable)
   - Outlier intensity: 22.6 (requires investigation)

Recommended Actions:
1. Immediate:
   - Retake CC Left image for patient 10011
   - Check exposure settings for CC Left views

2. Quality Improvement:
   - Review CC positioning protocol
   - Implement exposure standardization
   - Consider equipment calibration check


In [8]:
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import cv2
import json
from typing import Dict, List, Optional

class MammogramQualityMonitor:
    def __init__(self, log_dir: str = "/kaggle/working/quality_logs"):
        self.log_dir = Path(log_dir)
        self.log_dir.mkdir(exist_ok=True)
        
        # Quality thresholds based on analysis
        self.thresholds = {
            'intensity': {
                'normal_range': (200, 230),
                'warning_range': (150, 250),
                'critical_range': (20, 300)
            },
            'std_dev': {
                'normal_range': (75, 85),
                'warning_range': (70, 90),
                'critical_range': (40, 100)
            },
            'symmetry': {
                'acceptable': 20,
                'warning': 50,
                'critical': 100
            }
        }

    def check_image_quality(self, img: np.ndarray, metadata: Dict) -> Dict:
        """
        Real-time quality check for single image
        """
        mean_intensity = img.mean()
        std_dev = img.std()
        
        # Determine quality status
        quality_status = self._get_quality_status(mean_intensity, std_dev)
        
        return {
            'timestamp': datetime.now().isoformat(),
            'patient_id': metadata.get('patient_id'),
            'image_id': metadata.get('image_id'),
            'view': metadata.get('view'),
            'laterality': metadata.get('laterality'),
            'metrics': {
                'mean_intensity': mean_intensity,
                'std_dev': std_dev,
                'quality_status': quality_status,
                'warnings': self._generate_warnings(mean_intensity, std_dev)
            }
        }

    def _get_quality_status(self, intensity: float, std_dev: float) -> str:
        """
        Determine quality status based on metrics
        """
        if (self.thresholds['intensity']['normal_range'][0] <= intensity <= 
            self.thresholds['intensity']['normal_range'][1] and
            self.thresholds['std_dev']['normal_range'][0] <= std_dev <= 
            self.thresholds['std_dev']['normal_range'][1]):
            return 'OPTIMAL'
            
        elif (self.thresholds['intensity']['warning_range'][0] <= intensity <= 
              self.thresholds['intensity']['warning_range'][1] and
              self.thresholds['std_dev']['warning_range'][0] <= std_dev <= 
              self.thresholds['std_dev']['warning_range'][1]):
            return 'WARNING'
            
        return 'CRITICAL'

    def _generate_warnings(self, intensity: float, std_dev: float) -> List[str]:
        """
        Generate specific warnings based on metrics
        """
        warnings = []
        
        if intensity < self.thresholds['intensity']['warning_range'][0]:
            warnings.append(f"Low intensity: {intensity:.1f}")
        elif intensity > self.thresholds['intensity']['warning_range'][1]:
            warnings.append(f"High intensity: {intensity:.1f}")
            
        if std_dev < self.thresholds['std_dev']['warning_range'][0]:
            warnings.append(f"Low contrast: {std_dev:.1f}")
        elif std_dev > self.thresholds['std_dev']['warning_range'][1]:
            warnings.append(f"High contrast: {std_dev:.1f}")
            
        return warnings

    def process_study(self, study_dir: str) -> Dict:
        """
        Process entire study and generate report
        """
        study_dir = Path(study_dir)
        study_results = {
            'timestamp': datetime.now().isoformat(),
            'images': [],
            'symmetry': {},
            'overall_quality': 'OPTIMAL'
        }
        
        # Process each view
        for view in ['CC', 'MLO']:
            view_intensities = {'L': [], 'R': []}
            
            for laterality in ['L', 'R']:
                view_path = study_dir / view / laterality
                if view_path.exists():
                    for img_path in view_path.glob('*.png'):
                        if not str(img_path).endswith('thumb.png'):
                            img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                            if img is not None:
                                metadata = {
                                    'patient_id': img_path.stem.split('_')[0],
                                    'image_id': img_path.stem.split('_')[1],
                                    'view': view,
                                    'laterality': laterality
                                }
                                
                                result = self.check_image_quality(img, metadata)
                                study_results['images'].append(result)
                                
                                if result['metrics']['quality_status'] == 'CRITICAL':
                                    study_results['overall_quality'] = 'CRITICAL'
                                elif (result['metrics']['quality_status'] == 'WARNING' and 
                                      study_results['overall_quality'] == 'OPTIMAL'):
                                    study_results['overall_quality'] = 'WARNING'
                                    
                                view_intensities[laterality].append(
                                    result['metrics']['mean_intensity']
                                )
            
            # Check symmetry
            if view_intensities['L'] and view_intensities['R']:
                left_mean = np.mean(view_intensities['L'])
                right_mean = np.mean(view_intensities['R'])
                difference = abs(left_mean - right_mean)
                
                symmetry_status = 'OPTIMAL'
                if difference > self.thresholds['symmetry']['critical']:
                    symmetry_status = 'CRITICAL'
                elif difference > self.thresholds['symmetry']['warning']:
                    symmetry_status = 'WARNING'
                
                study_results['symmetry'][view] = {
                    'difference': difference,
                    'status': symmetry_status
                }
        
        # Save results
        self._save_results(study_results)
        
        return study_results

    def _save_results(self, results: Dict):
        """
        Save quality check results
        """
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        file_path = self.log_dir / f"quality_check_{timestamp}.json"
        
        with open(file_path, 'w') as f:
            json.dump(results, f, indent=2)

    def generate_report(self, results: Dict):
        """
        Generate human-readable report
        """
        print("\nMammogram Quality Monitor Report")
        print("===============================")
        
        print(f"\nOverall Study Quality: {results['overall_quality']}")
        
        print("\nImage Quality Summary:")
        for img in results['images']:
            print(f"\n{img['view']}-{img['laterality']} "
                  f"(Patient: {img['patient_id']}):")
            print(f"  Status: {img['metrics']['quality_status']}")
            if img['metrics']['warnings']:
                print("  Warnings:")
                for warning in img['metrics']['warnings']:
                    print(f"    - {warning}")
        
        print("\nSymmetry Analysis:")
        for view, symmetry in results['symmetry'].items():
            print(f"\n{view} View:")
            print(f"  Difference: {symmetry['difference']:.1f}")
            print(f"  Status: {symmetry['status']}")

def run_quality_monitor(study_dir: str = "/kaggle/working/processed_images"):
    monitor = MammogramQualityMonitor()
    results = monitor.process_study(study_dir)
    monitor.generate_report(results)

if __name__ == "__main__":
    run_quality_monitor()


Mammogram Quality Monitor Report

Overall Study Quality: CRITICAL

Image Quality Summary:

CC-L (Patient: 10011):
  Status: CRITICAL
    - Low intensity: 22.6
    - Low contrast: 47.3

CC-L (Patient: 10006):
  Status: OPTIMAL

CC-R (Patient: 10006):
  Status: OPTIMAL

MLO-L (Patient: 10006):
  Status: OPTIMAL

MLO-R (Patient: 10006):
  Status: OPTIMAL

Symmetry Analysis:

CC View:
  Difference: 101.1
  Status: CRITICAL

MLO View:
  Difference: 1.2
  Status: OPTIMAL


In [9]:
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import json
from typing import Dict, List, Tuple

class MammogramQualityAnalyzer:
    def __init__(self):
        """Initialize the analyzer with MQSA guidelines and tracking data"""
        self.mqsa_guidelines = {
            'intensity': {
                'acceptable_range': (180, 230),
                'target_range': (200, 220)
            },
            'std_dev': {
                'acceptable_range': (70, 90),
                'target_range': (75, 85)
            },
            'symmetry': {
                'acceptable': 20,
                'target': 10
            }
        }
        
        self.tracking_data = {
            'CC': {'L': [], 'R': []},
            'MLO': {'L': [], 'R': []}
        }

    def analyze_study(self, study_data: Dict) -> Dict:
        """Analyze mammogram study and generate quality assessment"""
        analysis = {
            'critical_issues': [],
            'warnings': [],
            'recommendations': [],
            'metrics': {
                'CC': {'L': {}, 'R': {}},
                'MLO': {'L': {}, 'R': {}}
            }
        }

        # Process each image
        for img in study_data['images']:
            view = img['view']
            lat = img['laterality']
            metrics = img['metrics']
            patient_id = img['patient_id']

            # Update tracking data
            self.tracking_data[view][lat].append({
                'patient_id': patient_id,
                'intensity': metrics['mean_intensity'],
                'std_dev': metrics['std_dev'],
                'timestamp': img['timestamp']
            })

            # Analyze metrics
            self._analyze_intensity(analysis, view, lat, metrics['mean_intensity'], patient_id)
            self._analyze_contrast(analysis, view, lat, metrics['std_dev'], patient_id)
            
            # Store metrics
            analysis['metrics'][view][lat] = metrics

        # Analyze symmetry
        self._analyze_symmetry(analysis, study_data['symmetry'])
        self._generate_recommendations(analysis)

        return analysis

    def _analyze_intensity(self, analysis: Dict, view: str, lat: str, 
                         intensity: float, patient_id: str):
        """Analyze image intensity against MQSA guidelines"""
        if intensity < self.mqsa_guidelines['intensity']['acceptable_range'][0]:
            analysis['critical_issues'].append(
                f"Critical low intensity ({intensity:.1f}) in {view}-{lat} for patient {patient_id}"
            )
        elif intensity > self.mqsa_guidelines['intensity']['acceptable_range'][1]:
            analysis['critical_issues'].append(
                f"Critical high intensity ({intensity:.1f}) in {view}-{lat} for patient {patient_id}"
            )
        elif (intensity < self.mqsa_guidelines['intensity']['target_range'][0] or 
              intensity > self.mqsa_guidelines['intensity']['target_range'][1]):
            analysis['warnings'].append(
                f"Suboptimal intensity ({intensity:.1f}) in {view}-{lat}"
            )

    def _analyze_contrast(self, analysis: Dict, view: str, lat: str, 
                         std_dev: float, patient_id: str):
        """Analyze image contrast using standard deviation"""
        if std_dev < self.mqsa_guidelines['std_dev']['acceptable_range'][0]:
            analysis['critical_issues'].append(
                f"Critical low contrast ({std_dev:.1f}) in {view}-{lat} for patient {patient_id}"
            )
        elif std_dev > self.mqsa_guidelines['std_dev']['acceptable_range'][1]:
            analysis['warnings'].append(
                f"High contrast variability ({std_dev:.1f}) in {view}-{lat}"
            )

    def _analyze_symmetry(self, analysis: Dict, symmetry_data: Dict):
        """Analyze symmetry between left and right views"""
        for view, data in symmetry_data.items():
            if data['status'] == 'CRITICAL':
                analysis['critical_issues'].append(
                    f"Critical asymmetry in {view} view (difference: {data['difference']:.1f})"
                )
                analysis['recommendations'].append(
                    f"Review positioning and exposure settings for {view} views"
                )

    def _generate_recommendations(self, analysis: Dict):
        """Generate specific recommendations based on findings"""
        # Intensity-related recommendations
        if any('intensity' in issue.lower() for issue in analysis['critical_issues']):
            analysis['recommendations'].extend([
                "Check exposure settings and adjust as needed",
                "Verify detector calibration",
                "Review positioning technique"
            ])
        
        # Contrast-related recommendations
        if any('contrast' in issue.lower() for issue in analysis['critical_issues']):
            analysis['recommendations'].extend([
                "Verify image processing parameters",
                "Check compression settings",
                "Consider equipment maintenance"
            ])

    def _plot_intensity_trends(self):
        """Plot intensity trends for each view and laterality"""
        colors = {'CC': {'L': 'blue', 'R': 'lightblue'},
                 'MLO': {'L': 'green', 'R': 'lightgreen'}}
        
        for view in ['CC', 'MLO']:
            for lat in ['L', 'R']:
                data = self.tracking_data[view][lat]
                if data:
                    intensities = [d['intensity'] for d in data]
                    plt.plot(intensities, 'o-', label=f'{view}-{lat}',
                            color=colors[view][lat])
        
        plt.axhline(y=self.mqsa_guidelines['intensity']['target_range'][0],
                   color='r', linestyle='--', alpha=0.5)
        plt.axhline(y=self.mqsa_guidelines['intensity']['target_range'][1],
                   color='r', linestyle='--', alpha=0.5)
        
        plt.title('Intensity Trends')
        plt.ylabel('Mean Intensity')
        plt.xlabel('Image Sequence')
        plt.legend()

    def _plot_quality_distribution(self):
        """Plot quality score distribution"""
        quality_scores = []
        labels = []
        
        for view in ['CC', 'MLO']:
            for lat in ['L', 'R']:
                for data in self.tracking_data[view][lat]:
                    intensity = data['intensity']
                    std_dev = data['std_dev']
                    
                    # Calculate quality score
                    score = 100
                    if intensity < self.mqsa_guidelines['intensity']['acceptable_range'][0]:
                        score -= 50
                    elif intensity < self.mqsa_guidelines['intensity']['target_range'][0]:
                        score -= 20
                    
                    if std_dev < self.mqsa_guidelines['std_dev']['acceptable_range'][0]:
                        score -= 30
                    
                    quality_scores.append(score)
                    labels.append(f"{view}-{lat}")
        
        if quality_scores:
            plt.hist(quality_scores, bins=10, color='skyblue', alpha=0.7)
            plt.title('Quality Score Distribution')
            plt.xlabel('Quality Score')
            plt.ylabel('Frequency')

    def _plot_symmetry_tracking(self):
        """Plot symmetry differences over time"""
        cc_diffs = []
        mlo_diffs = []
        
        for view in ['CC', 'MLO']:
            left_data = self.tracking_data[view]['L']
            right_data = self.tracking_data[view]['R']
            
            if left_data and right_data:
                left_intensities = [d['intensity'] for d in left_data]
                right_intensities = [d['intensity'] for d in right_data]
                diffs = [abs(l - r) for l, r in zip(left_intensities, right_intensities)]
                
                if view == 'CC':
                    cc_diffs = diffs
                else:
                    mlo_diffs = diffs
        
        x = range(max(len(cc_diffs), len(mlo_diffs)))
        if cc_diffs:
            plt.plot(x[:len(cc_diffs)], cc_diffs, 'b-', label='CC')
        if mlo_diffs:
            plt.plot(x[:len(mlo_diffs)], mlo_diffs, 'g-', label='MLO')
            
        plt.axhline(y=self.mqsa_guidelines['symmetry']['acceptable'],
                   color='r', linestyle='--', label='Acceptable Limit')
        
        plt.title('Symmetry Differences Over Time')
        plt.xlabel('Image Pair')
        plt.ylabel('Intensity Difference')
        plt.legend()

    def _plot_performance_metrics(self):
        """Plot overall performance metrics"""
        metrics = {
            'Optimal Images': 0,
            'Warning Images': 0,
            'Critical Images': 0
        }
        
        total_images = 0
        for view in ['CC', 'MLO']:
            for lat in ['L', 'R']:
                for data in self.tracking_data[view][lat]:
                    total_images += 1
                    intensity = data['intensity']
                    
                    if (self.mqsa_guidelines['intensity']['target_range'][0] <= 
                        intensity <= 
                        self.mqsa_guidelines['intensity']['target_range'][1]):
                        metrics['Optimal Images'] += 1
                    elif (self.mqsa_guidelines['intensity']['acceptable_range'][0] <= 
                          intensity <= 
                          self.mqsa_guidelines['intensity']['acceptable_range'][1]):
                        metrics['Warning Images'] += 1
                    else:
                        metrics['Critical Images'] += 1
        
        if total_images > 0:
            for key in metrics:
                metrics[key] = (metrics[key] / total_images) * 100
            
            plt.bar(metrics.keys(), metrics.values(),
                   color=['green', 'yellow', 'red'])
            plt.title('Overall Performance Metrics')
            plt.ylabel('Percentage of Images')
            plt.xticks(rotation=45)

    def generate_trending_report(self, output_dir: str):
        """Generate trending analysis visualizations"""
        plt.figure(figsize=(15, 10))
        
        # Create subplots
        plt.subplot(2, 2, 1)
        self._plot_intensity_trends()
        
        plt.subplot(2, 2, 2)
        self._plot_quality_distribution()
        
        plt.subplot(2, 2, 3)
        self._plot_symmetry_tracking()
        
        plt.subplot(2, 2, 4)
        self._plot_performance_metrics()
        
        plt.tight_layout()
        plt.savefig(Path(output_dir) / 'quality_trends.png')
        plt.close()

    def print_detailed_analysis(self, analysis: Dict):
        """Print detailed analysis results"""
        print("\nDetailed Quality Analysis Report")
        print("===============================")
        
        if analysis['critical_issues']:
            print("\nCRITICAL ISSUES:")
            for issue in analysis['critical_issues']:
                print(f"❌ {issue}")
        
        if analysis['warnings']:
            print("\nWARNINGS:")
            for warning in analysis['warnings']:
                print(f"⚠️ {warning}")
        
        print("\nRecommended Actions:")
        if analysis['recommendations']:
            for i, rec in enumerate(analysis['recommendations'], 1):
                print(f"\n{i}. {rec}")
        else:
            print("✅ No immediate actions required")
        
        print("\nQuality Trends:")
        print("---------------")
        for view in ['CC', 'MLO']:
            for lat in ['L', 'R']:
                data = self.tracking_data[view][lat]
                if data:
                    intensities = [d['intensity'] for d in data]
                    print(f"\n{view}-{lat}:")
                    print(f"  Average Intensity: {np.mean(intensities):.1f}")
                    print(f"  Trend: {'STABLE' if np.std(intensities) < 10 else 'VARIABLE'}")

def process_study_data(base_dir: str) -> Dict:
    """Process study data from directory"""
    base_dir = Path(base_dir)
    study_data = {
        'images': [],
        'symmetry': {
            'CC': {'difference': 0, 'status': 'OPTIMAL'},
            'MLO': {'difference': 0, 'status': 'OPTIMAL'}
        }
    }
    
    # Process images
    for view in ['CC', 'MLO']:
        view_dir = base_dir / view
        if view_dir.exists():
            for lat in ['L', 'R']:
                lat_dir = view_dir / lat
                if lat_dir.exists():
                    for img_path in lat_dir.glob('*.png'):
                        if not img_path.stem.endswith('thumb'):
                            img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
                            if img is not None:
                                metrics = {
                                    'mean_intensity': img.mean(),
                                    'std_dev': img.std()
                                }
                                study_data['images'].append({
                                    'view': view,
                                    'laterality': lat,
                                    'patient_id': img_path.stem.split('_')[0],
                                    'timestamp': datetime.now().isoformat(),
                                    'metrics': metrics
                                })
    
    # Calculate symmetry
    for view in ['CC', 'MLO']:
        left_intensities = [img['metrics']['mean_intensity'] 
                          for img in study_data['images'] 
                          if img['view'] == view and img['laterality'] == 'L']
        right_intensities = [img['metrics']['mean_intensity'] 
                           for img in study_data['images'] 
                           if img['view'] == view and img['laterality'] == 'R']
        
        if left_intensities and right_intensities:
            difference = abs(np.mean(left_intensities) - np.mean(right_intensities))
            study_data['symmetry'][view] = {
                'difference': difference,
                'status': 'CRITICAL' if difference > 20 else 'OPTIMAL'
            }
    
    return study_data

def run_enhanced_analysis(base_dir: str = "/kaggle/working/processed_images"):
    """Run complete analysis pipeline"""
    print("Processing study data...")
    study_data = process_study_data(base_dir)
    
    print("Analyzing quality metrics...")
    analyzer = MammogramQualityAnalyzer()
    analysis = analyzer.analyze_study(study_data)
    
    print("Generating reports...")
    analyzer.print_detailed_analysis(analysis)
    analyzer.generate_trending_report(base_dir)
    
    print("Analysis complete!")

if __name__ == "__main__":
    run_enhanced_analysis()

Processing study data...
Analyzing quality metrics...
Generating reports...

Detailed Quality Analysis Report

CRITICAL ISSUES:
❌ Critical low intensity (22.6) in CC-L for patient 10011
❌ Critical low contrast (47.3) in CC-L for patient 10011
❌ Critical asymmetry in CC view (difference: 101.1)

⚠️ Suboptimal intensity (220.4) in CC-L
⚠️ Suboptimal intensity (222.7) in CC-R

Recommended Actions:

1. Review positioning and exposure settings for CC views

2. Check exposure settings and adjust as needed

3. Verify detector calibration

4. Review positioning technique

5. Verify image processing parameters

6. Check compression settings

7. Consider equipment maintenance

Quality Trends:
---------------

CC-L:
  Average Intensity: 121.5
  Trend: VARIABLE

CC-R:
  Average Intensity: 222.7
  Trend: STABLE

MLO-L:
  Average Intensity: 211.6
  Trend: STABLE

MLO-R:
  Average Intensity: 212.8
  Trend: STABLE
Analysis complete!


In [10]:
class MammogramMonitor:
    def __init__(self):
        self.quality_thresholds = {
            'intensity': {
                'optimal': (200, 220),
                'acceptable': (180, 230),
                'critical_low': 50,
                'critical_high': 250
            },
            'symmetry': {
                'optimal': 10,
                'acceptable': 20,
                'critical': 50
            },
            'contrast': {
                'minimal': 70,
                'optimal': 80
            }
        }

    def monitor_cc_quality(self, cc_left: dict, cc_right: dict) -> dict:
        """Specific monitoring for CC view issues"""
        return {
            'intensity_check': self._check_cc_intensity(cc_left, cc_right),
            'symmetry_check': self._check_cc_symmetry(cc_left, cc_right),
            'contrast_check': self._check_cc_contrast(cc_left, cc_right)
        }
    
    def _check_cc_intensity(self, cc_left: dict, cc_right: dict) -> dict:
        left_intensity = cc_left['mean_intensity']
        right_intensity = cc_right['mean_intensity']
        
        recommendations = []
        if left_intensity < self.quality_thresholds['intensity']['critical_low']:
            recommendations.append({
                'priority': 'IMMEDIATE',
                'issue': f'Critical low intensity in CC Left: {left_intensity:.1f}',
                'action': 'Increase exposure settings for CC Left view'
            })
        
        if abs(left_intensity - right_intensity) > self.quality_thresholds['symmetry']['critical']:
            recommendations.append({
                'priority': 'HIGH',
                'issue': f'Large intensity difference: {abs(left_intensity - right_intensity):.1f}',
                'action': 'Standardize exposure settings between CC views'
            })
            
        return recommendations
    
    def _check_cc_contrast(self, cc_left: dict, cc_right: dict) -> dict:
        left_std = cc_left['std_dev']
        recommendations = []
        
        if left_std < self.quality_thresholds['contrast']['minimal']:
            recommendations.append({
                'priority': 'HIGH',
                'issue': f'Insufficient contrast in CC Left: {left_std:.1f}',
                'action': [
                    'Check compression paddle alignment',
                    'Verify processing parameters',
                    'Consider equipment calibration'
                ]
            })
        return recommendations
    
    def _check_cc_symmetry(self, cc_left: dict, cc_right: dict) -> dict:
        intensity_diff = abs(cc_left['mean_intensity'] - cc_right['mean_intensity'])
        
        if intensity_diff > self.quality_thresholds['symmetry']['critical']:
            return [{
                'priority': 'HIGH',
                'issue': f'Critical asymmetry between CC views: {intensity_diff:.1f}',
                'action': [
                    'Review positioning protocol',
                    'Check exposure consistency',
                    'Verify equipment calibration'
                ]
            }]
        return []
    
    def generate_focused_report(self, study_data: dict):
        """Generate focused report for identified issues"""
        print("\nFocused Quality Monitoring Report")
        print("================================")
        
        # Extract study data
        cc_left_data = {
            'mean_intensity': 22.6,  # From your actual data
            'std_dev': 47.3         # From your actual data
        }
        
        cc_right_data = {
            'mean_intensity': 222.7,  # From your actual data
            'std_dev': 76.8          # From your actual data
        }
        
        monitoring_results = self.monitor_cc_quality(cc_left_data, cc_right_data)
        
        # Print prioritized recommendations
        print("\nPriority Actions Required:")
        for check_type, issues in monitoring_results.items():
            for issue in issues:
                print(f"\n[{issue['priority']}] {issue['issue']}")
                if isinstance(issue['action'], list):
                    for action in issue['action']:
                        print(f"  • {action}")
                else:
                    print(f"  • {issue['action']}")
        
        # Print quality status
        print("\nQuality Status Summary:")
        print("----------------------")
        print(f"CC Left Status: {'CRITICAL - Needs Immediate Attention' if cc_left_data['mean_intensity'] < 50 else 'Needs Review'}")
        print(f"CC Right Status: {'STABLE' if 180 <= cc_right_data['mean_intensity'] <= 230 else 'Needs Review'}")
        print(f"Symmetry Status: {'CRITICAL' if abs(cc_left_data['mean_intensity'] - cc_right_data['mean_intensity']) > 50 else 'ACCEPTABLE'}")

def create_sample_data():
    """Create sample data structure based on previous analysis"""
    return {
        'images': [
            {
                'view': 'CC',
                'laterality': 'L',
                'patient_id': '10011',
                'metrics': {
                    'mean_intensity': 22.6,
                    'std_dev': 47.3
                }
            },
            {
                'view': 'CC',
                'laterality': 'R',
                'patient_id': '10006',
                'metrics': {
                    'mean_intensity': 222.7,
                    'std_dev': 76.8
                }
            },
            {
                'view': 'MLO',
                'laterality': 'L',
                'patient_id': '10006',
                'metrics': {
                    'mean_intensity': 211.6,
                    'std_dev': 82.5
                }
            },
            {
                'view': 'MLO',
                'laterality': 'R',
                'patient_id': '10006',
                'metrics': {
                    'mean_intensity': 212.8,
                    'std_dev': 82.1
                }
            }
        ],
        'symmetry': {
            'CC': {'difference': 101.1, 'status': 'CRITICAL'},
            'MLO': {'difference': 1.2, 'status': 'OPTIMAL'}
        }
    }

if __name__ == "__main__":
    # Create sample data
    study_data = create_sample_data()
    
    # Run monitoring
    monitor = MammogramMonitor()
    monitor.generate_focused_report(study_data)



Focused Quality Monitoring Report

Priority Actions Required:

[IMMEDIATE] Critical low intensity in CC Left: 22.6
  • Increase exposure settings for CC Left view

[HIGH] Large intensity difference: 200.1
  • Standardize exposure settings between CC views

[HIGH] Critical asymmetry between CC views: 200.1
  • Review positioning protocol
  • Check exposure consistency
  • Verify equipment calibration

[HIGH] Insufficient contrast in CC Left: 47.3
  • Check compression paddle alignment
  • Verify processing parameters
  • Consider equipment calibration

Quality Status Summary:
----------------------
CC Left Status: CRITICAL - Needs Immediate Attention
CC Right Status: STABLE
Symmetry Status: CRITICAL
