In [1]:
import json
import pandas as pd
from pathlib import Path

In [2]:
lymph_total = 0
mono_total = 0
for wsi in Path('../data/raw/images/pas-cpg').iterdir():
    wsi_name = '_'.join(wsi.name.split('_')[:2])
    annot_lymph = Path('../data/raw/annotations/json_pixel') / (wsi_name + '_lymphocytes.json')
    annot_mono = Path('../data/raw/annotations/json_pixel') / (wsi_name + '_monocytes.json')

    with annot_lymph.open() as f:
        lymph_total += len(json.load(f)['points'])
    with annot_mono.open() as f:
        mono_total += len(json.load(f)['points'])

cells_total = lymph_total + mono_total
print(f'Cell number: {cells_total}')
print(f'Lymphocyte number: {lymph_total}')
print(f'Monocyte number: {mono_total}')

Cell number: 90367
Lymphocyte number: 59101
Monocyte number: 31266


In [3]:
def explore_patches(patch_dir: Path):
    cells = []

    for patch_path in patch_dir.joinpath('annotations').iterdir():
        with open(patch_path) as f:
            patch = {'cells': 0, 'lymphocytes': 0, 'monocytes': 0}
            for line in f.readlines():
                label = int(line.strip().split()[0])
                patch['cells'] += 1
                if label == 0:
                    patch['lymphocytes'] += 1
                elif label == 1:
                    patch['monocytes'] += 1
            cells.append(patch)

    cells = pd.DataFrame(cells)
    print(f'Patch number: {cells.shape[0]}')
    print(f'Cell number: {cells.cells.sum()}')
    print(f'Omitted cell numer: {cells_total - cells.cells.sum()}')
    print(cells.describe())

In [4]:
explore_patches(Path('../data/basic_box/pas-cpg512'))

Patch number: 2162
Cell number: 56752
Omitted cell numer: 33615
             cells  lymphocytes    monocytes
count  2162.000000  2162.000000  2162.000000
mean     26.249769    16.881591     9.368178
std      29.633416    23.970440    10.131960
min       0.000000     0.000000     0.000000
25%       5.000000     2.000000     2.000000
50%      16.000000     7.000000     6.000000
75%      37.000000    22.000000    13.000000
max     245.000000   216.000000    81.000000


In [5]:
explore_patches(Path('../data/basic_box/pas-cpg256'))

Patch number: 10756
Cell number: 72665
Omitted cell numer: 17702
              cells   lymphocytes     monocytes
count  10756.000000  10756.000000  10756.000000
mean       6.755764      4.381647      2.374117
std        8.828694      7.284740      3.110316
min        0.000000      0.000000      0.000000
25%        1.000000      0.000000      0.000000
50%        3.000000      1.000000      1.000000
75%        9.000000      5.000000      3.000000
max       91.000000     91.000000     28.000000


In [6]:
explore_patches(Path('../data/basic_box/pas-cpg128'))

Patch number: 47931
Cell number: 81717
Omitted cell numer: 8650
              cells   lymphocytes     monocytes
count  47931.000000  47931.000000  47931.000000
mean       1.704888      1.110284      0.594605
std        2.691048      2.229064      1.071157
min        0.000000      0.000000      0.000000
25%        0.000000      0.000000      0.000000
50%        1.000000      0.000000      0.000000
75%        2.000000      1.000000      1.000000
max       31.000000     30.000000     10.000000
