In [1]:
from pathlib import Path

In [2]:
def explore_labels(dataset: Path, patch_size: int, train_split: bool = False):
    if train_split:
        with open(dataset / 'autosplit_train.txt') as f:
            patches = f.readlines()
            patches = [Path(dataset / patch) for patch in patches]
    else:
        patches = dataset.joinpath('images').iterdir()

    total_cells = 0
    lymphocytes = []
    monocytes = []
    for patch_path in patches:
        label_file = Path(dataset / 'labels' / patch_path.with_suffix('.txt').name)
        with open(label_file) as f:
            for line in f:
                label = line.strip().split()
                wh = (float(label[3]) * patch_size, float(label[4]) * patch_size)
                if int(label[0]) == 0:
                    lymphocytes.append(wh)
                elif int(label[0]) == 1:
                    monocytes.append(wh)

        annot_file = Path(dataset / 'annotations' / patch_path.with_suffix('.txt').name)
        with open(annot_file) as f:
            total_cells += sum(1 for _ in f)

    ratio = (len(lymphocytes) + len(monocytes)) / total_cells
    if train_split:
        print(f'Cell number (only train split): {len(lymphocytes) + len(monocytes)} = {100 * ratio:.1f}% (/{total_cells})')
    else:
        print(f'Cell number: {len(lymphocytes) + len(monocytes)} = {100 * ratio:.1f}% (/{total_cells})')
    lymph_dims = [sum(wh)/len(wh) for wh in zip(*lymphocytes)]
    print(f'Average Lymphocyte Dimensions (wxh): {lymph_dims[0]:.1f} x {lymph_dims[1]:.1f}')
    mono_dims = [sum(wh)/len(wh) for wh in zip(*monocytes)]
    print(f'Average Monocyte Dimensions (wxh): {mono_dims[0]:.1f} x {mono_dims[1]:.1f}')

In [3]:
print('BASIC BOX')

BASIC BOX


In [4]:
explore_labels(Path('../data/basic_box/pas-cpg512'), 512)

Cell number: 56752 = 100.0% (/56752)
Average Lymphocyte Dimensions (wxh): 36.5 x 36.5
Average Monocyte Dimensions (wxh): 60.2 x 60.1


In [5]:
explore_labels(Path('../data/basic_box/pas-cpg256'), 256)

Cell number: 72665 = 100.0% (/72665)
Average Lymphocyte Dimensions (wxh): 35.9 x 35.8
Average Monocyte Dimensions (wxh): 58.2 x 58.3


In [6]:
explore_labels(Path('../data/basic_box/pas-cpg128'), 128)

Cell number: 81717 = 100.0% (/81717)
Average Lymphocyte Dimensions (wxh): 34.5 x 34.5
Average Monocyte Dimensions (wxh): 54.4 x 54.5


In [7]:
explore_labels(Path('../data/basic_box/ihc512'), 512)

Cell number: 56752 = 100.0% (/56752)
Average Lymphocyte Dimensions (wxh): 36.5 x 36.5
Average Monocyte Dimensions (wxh): 60.2 x 60.1


In [8]:
explore_labels(Path('../data/basic_box/ihc256'), 256)

Cell number: 72665 = 100.0% (/72665)
Average Lymphocyte Dimensions (wxh): 35.9 x 35.8
Average Monocyte Dimensions (wxh): 58.2 x 58.3


In [9]:
explore_labels(Path('../data/basic_box/ihc128'), 128)

Cell number: 81717 = 100.0% (/81717)
Average Lymphocyte Dimensions (wxh): 34.5 x 34.5
Average Monocyte Dimensions (wxh): 54.4 x 54.5


In [10]:
print('PURE SEG BOX')

PURE SEG BOX


In [11]:
explore_labels(Path('../data/pure_seg_box/pas-cpg512'), 512, train_split=True)

Cell number (only train split): 34186 = 75.5% (/45265)
Average Lymphocyte Dimensions (wxh): 17.8 x 18.5
Average Monocyte Dimensions (wxh): 21.0 x 21.9


In [12]:
explore_labels(Path('../data/pure_seg_box/pas-cpg256'), 256, train_split=True)

Cell number (only train split): 52715 = 90.5% (/58239)
Average Lymphocyte Dimensions (wxh): 16.7 x 17.2
Average Monocyte Dimensions (wxh): 18.8 x 19.6


In [13]:
explore_labels(Path('../data/pure_seg_box/pas-cpg128'), 128, train_split=True)

Cell number (only train split): 60976 = 93.3% (/65377)
Average Lymphocyte Dimensions (wxh): 14.8 x 15.1
Average Monocyte Dimensions (wxh): 16.1 x 16.6


In [14]:
explore_labels(Path('../data/pure_seg_box/ihc512'), 512, train_split=True)

Cell number (only train split): 33980 = 75.1% (/45265)
Average Lymphocyte Dimensions (wxh): 29.0 x 29.6
Average Monocyte Dimensions (wxh): 26.8 x 27.5


In [15]:
explore_labels(Path('../data/pure_seg_box/ihc256'), 256, train_split=True)

Cell number (only train split): 44694 = 76.7% (/58239)
Average Lymphocyte Dimensions (wxh): 25.9 x 26.3
Average Monocyte Dimensions (wxh): 23.4 x 23.8


In [16]:
explore_labels(Path('../data/pure_seg_box/ihc128'), 128, train_split=True)

Cell number (only train split): 46166 = 70.6% (/65377)
Average Lymphocyte Dimensions (wxh): 20.6 x 21.6
Average Monocyte Dimensions (wxh): 19.5 x 20.4


In [17]:
print('SEG BOX')

SEG BOX


In [18]:
explore_labels(Path('../data/seg_box/pas-cpg512'), 512)

Cell number: 56752 = 100.0% (/56752)
Average Lymphocyte Dimensions (wxh): 22.0 x 22.6
Average Monocyte Dimensions (wxh): 31.9 x 32.7


In [19]:
explore_labels(Path('../data/seg_box/pas-cpg256'), 256)

Cell number: 72665 = 100.0% (/72665)
Average Lymphocyte Dimensions (wxh): 18.3 x 18.9
Average Monocyte Dimensions (wxh): 23.5 x 24.3


In [20]:
explore_labels(Path('../data/seg_box/pas-cpg128'), 128)

Cell number: 81717 = 100.0% (/81717)
Average Lymphocyte Dimensions (wxh): 15.9 x 16.1
Average Monocyte Dimensions (wxh): 20.0 x 20.4


In [21]:
print('PADDING')

PADDING


In [22]:
print('PURE SEG BOX')

PURE SEG BOX


In [23]:
explore_labels(Path('../data/pure_seg_box/pas-cpg512_pad5'), 512, train_split=True)

Cell number (only train split): 34181 = 75.5% (/45265)
Average Lymphocyte Dimensions (wxh): 27.5 x 28.2
Average Monocyte Dimensions (wxh): 30.7 x 31.6


In [24]:
explore_labels(Path('../data/pure_seg_box/pas-cpg256_pad5'), 256, train_split=True)

Cell number (only train split): 52715 = 90.5% (/58239)
Average Lymphocyte Dimensions (wxh): 26.2 x 26.8
Average Monocyte Dimensions (wxh): 28.3 x 29.1


In [25]:
explore_labels(Path('../data/pure_seg_box/pas-cpg128_pad5'), 128, train_split=True)

Cell number (only train split): 60976 = 93.3% (/65377)
Average Lymphocyte Dimensions (wxh): 24.0 x 24.3
Average Monocyte Dimensions (wxh): 25.2 x 25.7


In [26]:
explore_labels(Path('../data/pure_seg_box/pas-cpg512_pad10'), 512, train_split=True)

Cell number (only train split): 34181 = 75.5% (/45265)
Average Lymphocyte Dimensions (wxh): 37.2 x 37.8
Average Monocyte Dimensions (wxh): 40.2 x 41.1


In [27]:
explore_labels(Path('../data/pure_seg_box/pas-cpg256_pad10'), 256, train_split=True)

Cell number (only train split): 52715 = 90.5% (/58239)
Average Lymphocyte Dimensions (wxh): 35.6 x 36.1
Average Monocyte Dimensions (wxh): 37.5 x 38.3


In [28]:
explore_labels(Path('../data/pure_seg_box/pas-cpg128_pad10'), 128, train_split=True)

Cell number (only train split): 60976 = 93.3% (/65377)
Average Lymphocyte Dimensions (wxh): 32.8 x 33.0
Average Monocyte Dimensions (wxh): 33.9 x 34.3


In [29]:
print('SEG BOX')

SEG BOX


In [30]:
explore_labels(Path('../data/seg_box/pas-cpg512_pad5'), 512)

Cell number: 56752 = 100.0% (/56752)
Average Lymphocyte Dimensions (wxh): 29.6 x 30.2
Average Monocyte Dimensions (wxh): 39.0 x 39.7


In [31]:
explore_labels(Path('../data/seg_box/pas-cpg256_pad5'), 256)

Cell number: 72665 = 100.0% (/72665)
Average Lymphocyte Dimensions (wxh): 27.1 x 27.6
Average Monocyte Dimensions (wxh): 31.9 x 32.7


In [32]:
explore_labels(Path('../data/seg_box/pas-cpg128_pad5'), 128)

Cell number: 81717 = 100.0% (/81717)
Average Lymphocyte Dimensions (wxh): 24.6 x 24.9
Average Monocyte Dimensions (wxh): 28.2 x 28.6


In [33]:
explore_labels(Path('../data/seg_box/pas-cpg512_pad10'), 512)

Cell number: 56752 = 100.0% (/56752)
Average Lymphocyte Dimensions (wxh): 37.0 x 37.6
Average Monocyte Dimensions (wxh): 45.9 x 46.6


In [34]:
explore_labels(Path('../data/seg_box/pas-cpg256_pad10'), 256)

Cell number: 72665 = 100.0% (/72665)
Average Lymphocyte Dimensions (wxh): 35.7 x 36.1
Average Monocyte Dimensions (wxh): 40.1 x 40.8


In [35]:
explore_labels(Path('../data/seg_box/pas-cpg128_pad10'), 128)

Cell number: 81717 = 100.0% (/81717)
Average Lymphocyte Dimensions (wxh): 32.9 x 33.1
Average Monocyte Dimensions (wxh): 36.0 x 36.4
