In [None]:
import os
from pathlib import Path
import pydicom
import pandas as pd
from datetime import datetime

# Function to convert DICOM time to the standard HH:MM:SS.fff format
def convert_dicom_time(dicom_time_str):
    if "." in dicom_time_str:  # Handle time with fractional seconds
        time_parts = dicom_time_str.split(".")
        dicom_time_str = time_parts[0]
        milliseconds = time_parts[1]
    else:
        milliseconds = "000"  # Default to 0 milliseconds if fractional part is missing
    
    if len(dicom_time_str) == 6:  # Ensure time string has 6 digits
        hours = dicom_time_str[:2]
        minutes = dicom_time_str[2:4]
        seconds = dicom_time_str[4:]
        return f"{hours}:{minutes}:{seconds}.{milliseconds}"
    else:
        return None

# Function to calculate scan duration, including milliseconds
def calculate_scan_duration(start_time_str, end_time_str):
    time_format = "%H:%M:%S.%f"
    try:
        start_time = datetime.strptime(start_time_str, time_format)
        end_time = datetime.strptime(end_time_str, time_format)
        
        # Handle cases where end_time is earlier than start_time (crossing midnight)
        if end_time < start_time:
            end_time = end_time.replace(day=start_time.day + 1)

        duration = end_time - start_time
        return str(duration) if duration.total_seconds() > 0 else "0:00:00"
    except Exception as e:
        print(f"Error calculating duration: {e}")
        return "Invalid duration"

# Function to read DICOM metadata
def read_dicom_metadata(filepath):
    try:
        return pydicom.read_file(filepath, stop_before_pixels=True)
    except Exception as e:
        print(f"Error reading DICOM metadata: {e}")
        return None

# Function to extract metadata from a series of DICOM files
def get_metadata(dicom_files):
    if dicom_files and len(dicom_files) > 1:
        # Read metadata from the first and last DICOM files in the series
        first_dicom_file = dicom_files[0]
        last_dicom_file = dicom_files[-1]
        
        img_first = read_dicom_metadata(first_dicom_file)
        img_last = read_dicom_metadata(last_dicom_file)
        
        if img_first and img_last:
            # Retrieve start time (SeriesTime) and end time (ContentTime) of the series
            raw_start_time = getattr(img_first, 'SeriesTime', "None")
            raw_end_time = getattr(img_last, 'ContentTime', "None")
            
            # Convert times to HH:MM:SS.fff format
            start_time = convert_dicom_time(raw_start_time) if raw_start_time != "None" else "None"
            end_time = convert_dicom_time(raw_end_time) if raw_end_time != "None" else start_time  # Default to start_time if end_time is missing
            
            # Calculate scan duration
            duration = calculate_scan_duration(start_time, end_time) if start_time and end_time else "0:00:00"
            
            return {
                'name': getattr(img_first, 'SeriesDescription', "None"),
                'ID': getattr(img_first, 'PatientID', "None"),
                'imgID': getattr(img_first, 'AccessionNumber', "None"),
                'study_date': getattr(img_first, 'StudyDate', "None"),
                'start_time': start_time,
                'end_time': end_time,
                'duration': duration,  # Duration calculated from SeriesTime and ContentTime
                'TE': getattr(img_first, 'EchoTime', "None"),
                'TR': getattr(img_first, 'RepetitionTime', "None"),
                'Bandwidth': getattr(img_first, 'PixelBandwidth', "None"),
                'slice_thickness': getattr(img_first, 'SliceThickness', "None"),
                'num_slices': len(dicom_files),
                'manufacturer': getattr(img_first, 'Manufacturer', "None"),
                'modality': getattr(img_first, 'Modality', "None"),
                'gender': getattr(img_first, 'PatientSex', "None"),
                'age': getattr(img_first, 'PatientAge', "None"),
                'height': getattr(img_first, 'PatientSize', "None"),
                'weight': getattr(img_first, 'PatientWeight', "None"),
                'file_count': len(dicom_files)
            }
    return None

# Function to export metadata to an Excel file
def export_metadata_to_excel(metadata_list, excel_path):
    df = pd.DataFrame(metadata_list)
    df.to_excel(excel_path, index=False)
    print(f"Metadata exported to {excel_path}")

# Main function to extract series metadata
def series_extract(dcmpath, excel_savepath):
    dcmpath = Path(dcmpath)
    
    metadata_list = []
    
    # Iterate through each patient folder
    for patient_folder in dcmpath.iterdir():
        if patient_folder.is_dir():
            if patient_folder.name.startswith('.'):
                continue  # Skip hidden folders
            # Retrieve all series folders (e.g., SE0, SE1, etc.) for the patient
            series_folders = sorted(list(patient_folder.iterdir()))
            for series_folder in series_folders:
                if series_folder.is_dir():
                    dicom_files = list(series_folder.glob('*'))
                    metadata = get_metadata(dicom_files)
                    if metadata:
                        seriesname_lower = metadata['name'].lower()
                        # Filter out series containing "processed"
                        if 'processed' not in seriesname_lower:
                            # Add series information without "processed"
                            metadata_list.append(metadata)

    export_metadata_to_excel(metadata_list, excel_savepath)

if __name__ == "__main__":
    dcmpath = "/Volumes/Extreme_zzl/DICOM/"
    excel_savepath = "/Volumes/Extreme_zzl/MRCP_metadata_filtered_no_processed.xlsx"
    
    series_extract(dcmpath, excel_savepath)

Metadata exported to /Volumes/Extreme_zzl/MRCP_metadata_filtered_no_processed.xlsx
