In [3]:
import os
from pathlib import Path
import pydicom
import pandas as pd
from datetime import datetime

# 将DICOM时间转换为标准HH:MM:SS.fff格式的函数
def convert_dicom_time(dicom_time_str):
    if "." in dicom_time_str:  # 处理包含小数的时间
        time_parts = dicom_time_str.split(".")
        dicom_time_str = time_parts[0]
        milliseconds = time_parts[1]
    else:
        milliseconds = "000"  # 如果没有小数部分，默认为0毫秒
    
    if len(dicom_time_str) == 6:  # 确保时间为6位数
        hours = dicom_time_str[:2]
        minutes = dicom_time_str[2:4]
        seconds = dicom_time_str[4:]
        return f"{hours}:{minutes}:{seconds}.{milliseconds}"
    else:
        return None

# 计算扫描持续时间的函数，包含毫秒
def calculate_scan_duration(start_time_str, end_time_str):
    time_format = "%H:%M:%S.%f"
    try:
        start_time = datetime.strptime(start_time_str, time_format)
        end_time = datetime.strptime(end_time_str, time_format)
        
        # 如果 end_time 小于 start_time，说明是跨越了午夜
        if end_time < start_time:
            end_time = end_time.replace(day=start_time.day + 1)

        duration = end_time - start_time
        return str(duration) if duration.total_seconds() > 0 else "0:00:00"
    except Exception as e:
        print(f"Error calculating duration: {e}")
        return "Invalid duration"

def read_dicom_metadata(filepath):
    try:
        return pydicom.read_file(filepath, stop_before_pixels=True)
    except Exception as e:
        print(f"Error reading DICOM metadata: {e}")
        return None

def get_metadata(dicom_files):
    if dicom_files and len(dicom_files) > 1:
        # 读取第一个和最后一个DICOM文件的头信息
        first_dicom_file = dicom_files[0]
        last_dicom_file = dicom_files[-1]
        
        img_first = read_dicom_metadata(first_dicom_file)
        img_last = read_dicom_metadata(last_dicom_file)
        
        if img_first and img_last:
            # 获取序列的开始时间（SeriesTime）和结束时间（ContentTime）
            raw_start_time = getattr(img_first, 'SeriesTime', "None")
            raw_end_time = getattr(img_last, 'ContentTime', "None")
            
            # 转换时间为HH:MM:SS.fff格式
            start_time = convert_dicom_time(raw_start_time) if raw_start_time != "None" else "None"
            end_time = convert_dicom_time(raw_end_time) if raw_end_time != "None" else start_time  # 如果没有end_time，使用start_time
            
            # 计算扫描持续时间
            duration = calculate_scan_duration(start_time, end_time) if start_time and end_time else "0:00:00"
            
            return {
                'name': getattr(img_first, 'SeriesDescription', "None"),
                'ID': getattr(img_first, 'PatientID', "None"),
                'imgID': getattr(img_first, 'AccessionNumber', "None"),
                'study_date': getattr(img_first, 'StudyDate', "None"),
                'start_time': start_time,
                'end_time': end_time,
                'duration': duration,  # 使用SeriesTime和ContentTime计算扫描持续时间
                'TE': getattr(img_first, 'EchoTime', "None"),
                'TR': getattr(img_first, 'RepetitionTime', "None"),
                'Bandwidth': getattr(img_first, 'PixelBandwidth', "None"),
                'slice_thickness': getattr(img_first, 'SliceThickness', "None"),
                'num_slices': len(dicom_files),
                'manufacturer': getattr(img_first, 'Manufacturer', "None"),
                'modality': getattr(img_first, 'Modality', "None"),
                'gender': getattr(img_first, 'PatientSex', "None"),
                'age': getattr(img_first, 'PatientAge', "None"),
                'height': getattr(img_first, 'PatientSize', "None"),
                'weight': getattr(img_first, 'PatientWeight', "None"),
                'file_count': len(dicom_files)
            }
    return None

def export_metadata_to_excel(metadata_list, excel_path):
    df = pd.DataFrame(metadata_list)
    df.to_excel(excel_path, index=False)
    print(f"Metadata exported to {excel_path}")

def series_extract(dcmpath, excel_savepath):
    dcmpath = Path(dcmpath)
    
    metadata_list = []
    
    # 遍历每个病人文件夹
    for patient_folder in dcmpath.iterdir():
        if patient_folder.is_dir():
            if patient_folder.name.startswith('.'):
                continue  # 直接跳过
            # 获取病人的所有序列文件夹（如SE0、SE1等）
            series_folders = sorted(list(patient_folder.iterdir()))
            for series_folder in series_folders:
                if series_folder.is_dir():
                    dicom_files = list(series_folder.glob('*'))
                    metadata = get_metadata(dicom_files)
                    if metadata:
                        seriesname_lower = metadata['name'].lower()
                        # 过滤掉包含 "processed" 的序列
                        if 'processed' not in seriesname_lower:
                            # 添加未包含 "processed" 的序列信息
                            metadata_list.append(metadata)

    export_metadata_to_excel(metadata_list, excel_savepath)

if __name__ == "__main__":
    dcmpath = "/Volumes/Extreme_zzl/DICOM/"
    excel_savepath = "/Volumes/Extreme_zzl/MRCP_metadata_filtered_no_processed.xlsx"
    
    series_extract(dcmpath, excel_savepath)

Metadata exported to /Volumes/Extreme_zzl/MRCP_metadata_filtered_no_processed.xlsx
