In [1]:
import os
import json
import pandas as pd

DIR_LABELS = os.path.join(os.getcwd(), "..", "labels")
EXCEL_OVERVIEW_PELVIS = os.path.join(os.getcwd(), "..", "data/Task1/pelvis/overview/1_pelvis_train.xlsx")

In [2]:
os.listdir(DIR_LABELS)  # List files in the labels directory

['ood_Justin_2023.txt',
 'ood_kai_koepchen.txt',
 'ood_yeyang.txt',
 'labels_raw.json',
 'missing_Ömer.txt',
 'ct_hu_stats_pelvis.json',
 'Justin.txt',
 'ood_weijie.txt']

In [3]:
def parse_index(s):
    if s.strip().lower() == "na":
        return None
    elif s.strip().isdigit():
        return int(s.strip())
    else:
        raise ValueError(f"Invalid index value: {s}")


def process_line(line, abnormal_ids):
    try:
        data = line.strip().split(",")

        if data[5] in abnormal_ids:
            id_ = data[0]
            mr_start = parse_index(data[1])
            mr_end = parse_index(data[2])
            ct_start = parse_index(data[3])
            ct_end = parse_index(data[4])
            if id_.lower().startswith("1b"):
                body_part = "brain"
            elif id_.lower().startswith("1p"):
                body_part = "pelvis"
            else:
                raise ValueError(f"Unknown body part for id: {id_}")

            return {
                id_: {
                    "mr_start": mr_start,
                    "mr_end": mr_end,
                    "ct_start": ct_start,
                    "ct_end": ct_end,
                    "body_part": body_part
                }
            }
    except Exception as e:
        print(f"Error processing line: {e}: {line}")

In [6]:
def process_labels(dir_labels=DIR_LABELS):
    labels = []
    for filename in os.listdir(dir_labels):
        if filename.endswith(".txt") and filename.startswith("ood"):
            file_path = os.path.join(dir_labels, filename)
            with open(file_path, 'r') as file:
                processed_labels = filter(None, [process_line(line, abnormal_ids=["1"]) for line in file])
                labels.extend(processed_labels)
    return labels

In [7]:
labels_1 = process_labels()
labels = {
    "type1": labels_1
}

Error processing line: list index out of range: # some categories updated, confirmed with yeyang

Error processing line: list index out of range: 1BA222,

Error processing line: list index out of range: 1BA227, asymmetric due to positioning but registered correctly, so ID(?)

Error processing line: list index out of range: 1BA266, asymmetric due to positioning but ID

Error processing line: list index out of range: 1BA278,

Error processing line: list index out of range: 1BA300,

Error processing line: list index out of range: 1BA307,

Error processing line: list index out of range: 1BA328,

Error processing line: list index out of range: 1BA345,

Error processing line: list index out of range: 1BA358, asymmetric positioning

Error processing line: list index out of range: 1PA074,

Error processing line: list index out of range: 1PA076,

Error processing line: list index out of range: 1PA081,

Error processing line: list index out of range: 1PA084,

Error processing line: list index ou

In [8]:
with open(os.path.join(DIR_LABELS, "labels_raw.json"), 'w') as f:
    json.dump(labels, f)

In [None]:
len(labels_1)

158

In [None]:
labels_1

[{'1BB031': {'mr_start': 130,
   'mr_end': 167,
   'ct_start': 131,
   'ct_end': 165,
   'body_part': 'brain'}},
 {'1BB033': {'mr_start': 108,
   'mr_end': 122,
   'ct_start': 109,
   'ct_end': 120,
   'body_part': 'brain'}},
 {'1BB033': {'mr_start': 146,
   'mr_end': 166,
   'ct_start': 146,
   'ct_end': 164,
   'body_part': 'brain'}},
 {'1BB043': {'mr_start': 112,
   'mr_end': 120,
   'ct_start': 112,
   'ct_end': 118,
   'body_part': 'brain'}},
 {'1BB043': {'mr_start': 126,
   'mr_end': 138,
   'ct_start': 128,
   'ct_end': 136,
   'body_part': 'brain'}},
 {'1BB043': {'mr_start': 152,
   'mr_end': 164,
   'ct_start': 154,
   'ct_end': 160,
   'body_part': 'brain'}},
 {'1BB050': {'mr_start': 131,
   'mr_end': 147,
   'ct_start': 131,
   'ct_end': 146,
   'body_part': 'brain'}},
 {'1BB050': {'mr_start': 156,
   'mr_end': 169,
   'ct_start': 156,
   'ct_end': 167,
   'body_part': 'brain'}},
 {'1BB059': {'mr_start': 99,
   'mr_end': 114,
   'ct_start': 101,
   'ct_end': 112,
   'body_pa

In [None]:
df_overview_pelvis_ct = pd.read_excel(EXCEL_OVERVIEW_PELVIS, sheet_name="CT")
df_overview_pelvis_mr = pd.read_excel(EXCEL_OVERVIEW_PELVIS, sheet_name="MR")

In [None]:
df_overview_pelvis_mr

Unnamed: 0,ID,Set,Modality,MagneticFieldStrength,StudyDescription,SeriesDescription,ScanningSequence,SequenceVariant,ScanOptions,MRAcquisitionType,...,Columns,PixelSpacing,AcquisitionMatrix,Manufacturer,ManufacturerModelName,SoftwareVersions,Slices,Dim_pre,Spacing_pre,note
0,1PA001,train,MR,1.5,RT MRI CERVIX VARIABELE BLAASVUL,Source Pelvis MRCAT IP,RM,SP,OTHER,3D,...,480,"[1.13750004768371, 1.13750004768371]","[0, 390, 390, 0]",Philips Medical Systems,Ingenia,"['5.6.1', '5.6.1.2']",317.0,"(565, 338, 146)","(1.0, 1.0, 2.5)",
1,1PA004,train,MR,1.5,RT MRI CERVIX VARIABELE BLAASVUL,Source Pelvis MRCAT IP,RM,SP,OTHER,3D,...,480,"[1.13750004768371, 1.13750004768371]","[0, 390, 390, 0]",Philips Medical Systems,Ingenia,"['5.6.1', '5.6.1.2']",317.0,"(457, 263, 149)","(1.0, 1.0, 2.5)",
2,1PA005,train,MR,1.5,RT MRI CERVIX VARIABELE BLAASVUL,Source Pelvis MRCAT IP,RM,SP,OTHER,3D,...,480,"[1.13750004768371, 1.13750004768371]","[0, 390, 390, 0]",Philips Medical Systems,Ingenia,"['5.6.1', '5.6.1.0']",317.0,"(586, 410, 151)","(1.0, 1.0, 2.5)",
3,1PA009,train,MR,1.5,RT MRI CERVIX VARIABELE BLAASVUL,Source Pelvis MRCAT IP,RM,SP,OTHER,3D,...,480,"[1.13750004768371, 1.13750004768371]","[0, 390, 390, 0]",Philips Medical Systems,Ingenia,"['5.6.1', '5.6.1.0']",317.0,"(470, 299, 134)","(1.0, 1.0, 2.5)",
4,1PA010,train,MR,1.5,RT MRI CERVIX VARIABELE BLAASVUL,Source Pelvis MRCAT IP,RM,SP,OTHER,3D,...,480,"[1.13750004768371, 1.13750004768371]","[0, 390, 390, 0]",Philips Medical Systems,Ingenia,"['5.6.1', '5.6.1.0']",317.0,"(444, 312, 136)","(1.0, 1.0, 2.5)",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,1PC093,train,MR,3.0,,,SE,"['SK', 'SP', 'OSP']",PFP,3D,...,384,"[1.171875, 1.171875]","[384, 0, 0, 262]",,,syngo MR E11,,,,
176,1PC095,train,MR,3.0,,,SE,"['SK', 'SP', 'OSP']",PFP,3D,...,384,"[1.171875, 1.171875]","[384, 0, 0, 262]",,,syngo MR E11,,,,
177,1PC096,train,MR,3.0,,,SE,"['SK', 'SP', 'OSP']",PFP,3D,...,384,"[1.171875, 1.171875]","[384, 0, 0, 262]",,,syngo MR E11,,,,
178,1PC097,train,MR,3.0,,,SE,"['SK', 'SP', 'OSP']",PFP,3D,...,384,"[1.171875, 1.171875]","[384, 0, 0, 262]",,,syngo MR E11,,,,
