In [None]:
import json
import pandas as pd
import numpy as np
from pathlib import Path

import os
import glob
import math

import rasterio.features
from shapely.geometry import shape
from shapely.geometry import Polygon, Point

import seaborn as sns
from matplotlib import pyplot as plt

from scipy import stats
from scipy.stats import mannwhitneyu, normaltest
from statannotations.Annotator import Annotator

from PIL import Image

In [None]:
def get_log_ax(orient="v"):
    if orient == "v":
        figsize = (12, 6)
        set_scale = "set_yscale"
    else:
        figsize = (10, 8)
        set_scale = "set_xscale"
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    fig.patch.set_alpha(1)
    getattr(ax, set_scale)("log")
    return ax

def get_ax(figsize = (10, 10)):
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    return fig, ax

from matplotlib.patches import PathPatch

def adjust_box_widths(g, fac):
    """
    Adjust the withs of a seaborn-generated boxplot.
    """

    # iterating through Axes instances
    for ax in g.axes:

        # iterating through axes artists:
        for c in ax.get_children():

            # searching for PathPatches
            if isinstance(c, PathPatch):
                # getting current width of box:
                p = c.get_path()
                verts = p.vertices
                verts_sub = verts[:-1]
                xmin = np.min(verts_sub[:, 0])
                xmax = np.max(verts_sub[:, 0])
                xmid = 0.5*(xmin+xmax)
                xhalf = 0.5*(xmax - xmin)

                # setting new width of box
                xmin_new = xmid-fac*xhalf
                xmax_new = xmid+fac*xhalf
                verts_sub[verts_sub[:, 0] == xmin, 0] = xmin_new
                verts_sub[verts_sub[:, 0] == xmax, 0] = xmax_new

                # setting new width of median line
                for l in ax.lines:
                    if np.all(l.get_xdata() == [xmin, xmax]):
                        l.set_xdata([xmin_new, xmax_new])


In [None]:
TISSUE_TYPES = {
    0: 'Other',
    1: 'Tumor',
    2: 'Stroma',
    3: 'Fat',
    4: 'Normal',
    5: 'Vessel',
    6: 'Other'
}

In [None]:
df = pd.DataFrame()

files = glob.glob("topk_results_example_sample/10x_masks/*.png")

for f in files:
    img = np.array(Image.open(f))
    tmp = np.zeros((7), dtype=int)
    for idx, value in zip(*np.unique(img, return_counts=True)):
        tmp[idx] = value
    tmp = pd.DataFrame([tmp], columns=TISSUE_TYPES.values())
    tmp['file'] = Path(f).name
    tmp['Attention'] = Path(f).name.split('_')[2].capitalize()
    df = pd.concat((df, tmp), axis=0)

df = df.reset_index(drop=True)
other = df['Other'].sum(axis=1)
df.drop(columns='Other', inplace=True)
df['Other'] = other
df.head()

In [None]:
for tt in list(TISSUE_TYPES.values())[:-1]:
    df[tt] = df[tt] / (384 * 384)

df.head()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6,5))

_df = df.melt(
    id_vars=['Attention'],
    value_vars=list(TISSUE_TYPES.values())[:-1],
    var_name='Tissue Type',
    value_name='Percentage'
)
sns.boxplot(
    data=_df,
    x='Tissue Type',
    y='Percentage',
    hue='Attention',
    order=list(TISSUE_TYPES.values())[:-1],
    hue_order=['Low', 'High'],
    palette='Set2',
    ax=ax
)

pairs = [[(x, 'Low'), (x, 'High')] for x in list(TISSUE_TYPES.values())[:-1]]

annotator = Annotator(
    ax,
    pairs,
    data=_df,
    x='Tissue Type',
    y='Percentage',
    hue='Attention',
    order=list(TISSUE_TYPES.values())[:-1],
    hue_order=['Low', 'High'],
    verbose=False
)

annotator.configure(test='t-test_ind', text_format='star', loc='inside')
annotator.apply_and_annotate()

adjust_box_widths(fig, 0.9)
ax.xaxis.labelpad = 10

plt.tight_layout()

# fig.savefig("tissue_boxplots.png", dpi=300)
plt.show()