In [None]:
import json
import pandas as pd
import numpy as np

import os
import glob
import math

import rasterio.features
from shapely.geometry import shape
from shapely.geometry import Polygon, Point

import seaborn as sns
from matplotlib import pyplot as plt

from scipy import stats
from scipy.stats import mannwhitneyu, normaltest
from statannotations.Annotator import Annotator

In [None]:
palette = 'Set2'
pal = sns.color_palette(palette)
low_color = pal.as_hex()[0]
high_color = pal.as_hex()[1]

print(low_color)
print(high_color)
pal

In [None]:
def get_log_ax(orient="v"):
    if orient == "v":
        figsize = (12, 6)
        set_scale = "set_yscale"
    else:
        figsize = (10, 8)
        set_scale = "set_xscale"
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    fig.patch.set_alpha(1)
    getattr(ax, set_scale)("log")
    return ax

def get_ax(figsize = (10, 10)):
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    return fig, ax

from matplotlib.patches import PathPatch

def adjust_box_widths(g, fac):
    """
    Adjust the withs of a seaborn-generated boxplot.
    """

    # iterating through Axes instances
    for ax in g.axes:

        # iterating through axes artists:
        for c in ax.get_children():

            # searching for PathPatches
            if isinstance(c, PathPatch):
                # getting current width of box:
                p = c.get_path()
                verts = p.vertices
                verts_sub = verts[:-1]
                xmin = np.min(verts_sub[:, 0])
                xmax = np.max(verts_sub[:, 0])
                xmid = 0.5*(xmin+xmax)
                xhalf = 0.5*(xmax - xmin)

                # setting new width of box
                xmin_new = xmid-fac*xhalf
                xmax_new = xmid+fac*xhalf
                verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
                verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new

                # setting new width of median line
                for l in ax.lines:
                    if np.all(l.get_xdata() == [xmin, xmax]):
                        l.set_xdata([xmin_new, xmax_new])


In [None]:
def get_polygon_shape(poly):
    # get minimum bounding box around polygon
    box = poly.minimum_rotated_rectangle
    
    # get coordinates of polygon vertices
    x, y = box.exterior.coords.xy
    
    # get length of bounding box edges
    edge_length = (Point(x[0], y[0]).distance(Point(x[1], y[1])), Point(x[1], y[1]).distance(Point(x[2], y[2])))
    
    # get length of polygon as the longest edge of the bounding box
    length = max(edge_length)
    
    # get width of polygon as the shortest edge of the bounding box
    width = min(edge_length)

    return length, width

In [None]:
CELL_TYPES = {
    # "0" : ["nolabe", [0  ,   0,   0]], 
    "1" : ["neopla", [255,   0,   0]], 
    "2" : ["inflam", [0  , 255,   0]], 
    "3" : ["connec", [0  ,   0, 255]], 
    "4" : ["necros", [255, 255,   0]], 
    "5" : ["no-neo", [255, 165,   0]] 
}

CELL_NAMES = {
    # "nolabe": "Background",
    "neopla": "Neoplastic",
    "inflam": "Inflammatory",
    "connec": "Connective",
    "necros": "Necrotic",
    "no-neo": "No-Neoplastic"
}

In [None]:
df = pd.DataFrame()
folders = {
    'high_att': "topk_results_example_sample/20x_hovernet/json/high_att/*",
    'low_att': "topk_results_example_sample/20x_hovernet/json/low_att/*",
}

for att_type in folders:
    files = glob.glob(folders[att_type])
    
    for idx, f in enumerate(files):
        with open(f, 'r') as jf:
            patch = json.load(jf)
        
        for cell_id in patch['nuc']:
            cell = patch['nuc'][cell_id]
            cell_poly = Polygon(cell['contour'])
            try:
                cell_type = CELL_TYPES[str(cell['type'])][0]
            except KeyError:
                continue
        
            df = pd.concat((
                pd.DataFrame(
                    [[
                        f'patch_{idx}__cell_{cell_id}',
                        cell_poly.area,
                        cell_type,
                        att_type
                    ]],
                    columns = [
                        'id', 'area', 'class', 'att_type'
                    ]
                ), df), axis=0
            )

df['class'] = df['class'].replace(CELL_NAMES)
df['att_type'] = df['att_type'].replace({
    'low_att': 'Low Attention',
    'high_att': 'High Attention',
})

In [None]:
df

In [None]:
def draw_boxplot(df, x, y, hue, ax, palette='muted'):
    hue_order = ['Low Attention', 'High Attention']
    order = [
        CELL_NAMES['necros'],
        CELL_NAMES['inflam'],
        CELL_NAMES['connec'],
        CELL_NAMES['neopla'],
        CELL_NAMES['no-neo']
    ] if hue is not None else hue_order

    if hue is None:
        pairs = [hue_order]
    else:
        pairs = [
            [(CELL_NAMES[x], 'Low Attention'), (CELL_NAMES[x], 'High Attention')] for x in CELL_NAMES
        ]
    
    hue_order = None if hue is None else hue_order
    
    sns.boxplot(data=df, x=x, y=y, hue=hue, ax=ax, order=order, hue_order=hue_order, palette=palette)
    
    annotator = Annotator(ax, pairs, data=df, x=x, y=y, hue=hue, order=order, hue_order=hue_order, verbose=False)
    annotator.configure(test='t-test_ind', text_format='star', loc='inside')
    annotator.apply_and_annotate()

In [None]:
df

In [None]:
low_att_area = df[df.att_type == 'Low Attention'].area.tolist()
high_att_area = df[df.att_type == 'High Attention'].area.tolist()

from scipy.stats import ttest_ind

print(np.mean(high_att_area))
print(np.std(high_att_area))
print(np.mean(low_att_area))
print(np.std(low_att_area))
ttest_ind(high_att_area, low_att_area)

In [None]:
low_att_cell_counts = df[df.att_type == 'Low Attention']['class'].value_counts()
high_att_cell_counts = df[df.att_type == 'High Attention']['class'].value_counts()

print(low_att_cell_counts / low_att_cell_counts.sum())
print(high_att_cell_counts / high_att_cell_counts.sum())
# from scipy.stats import ttest_ind

# print(np.mean(high_att_area))
# print(np.std(high_att_area))
# print(np.mean(low_att_area))
# print(np.std(low_att_area))
# ttest_ind(high_att_area, low_att_area)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10,8), gridspec_kw={'width_ratios': [1, 2]}, sharey=True)

palette = 'Set2'

draw_boxplot(df, x='att_type', y='area', hue=None, ax=axes[0], palette=palette)
draw_boxplot(df, x='class', y='area', hue='att_type', ax=axes[1], palette=palette)

for idx, ax in enumerate(axes.flatten()):
    import string
    alphabet = list(string.ascii_lowercase)
    ax.xaxis.labelpad = 10
    ax.set_xlabel(f'{alphabet[idx]}')

axes[1].set_ylabel(None)
axes[0].set_ylabel('Area')
axes[1].legend(title=None)
axes[0].set_xticklabels(axes[0].get_xticklabels(), rotation=45, ha='right')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45, ha='right')
adjust_box_widths(fig, 0.9)

# fig.savefig("cell_stats.png", dpi=300)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6,5))

sns.histplot(data=df.rename(columns={"att_type": "Attention"}), x='class', hue='Attention', palette=palette, ax=ax)

ax.set_ylabel('Area')
ax.set_xlabel(None)
ax.set_xticklabels(ax.get_xticklabels(), rotation=20, ha='right')
# ax.legend(title=None)
# fig.savefig("cell_histogram.png", dpi=300)