In [None]:
# microneighborhood examples from old rep (RA nlatent=20, training pass 1): patchids = [11, 601, 1091, 9200, 1500, 1051, 451]

In [None]:
# plot microclusters as umap + examples
def to_polar(center, point):
    dx, dy = point[0] - center[0], point[1] - center[1]
    r = np.sqrt(dx**2 + dy**2)
    theta = np.arctan2(dy, dx)  # Angle in radians
    return r, theta

from scipy.spatial.distance import mahalanobis
def remove_outliers(points, threshold=3):
    mean = np.median(points, axis=0)
    cov_matrix = np.cov(points, rowvar=False)
    inv_cov_matrix = np.linalg.inv(cov_matrix)
    distances = np.array([mahalanobis(point, mean, inv_cov_matrix) for point in points])
    inliers = points[distances < threshold]
    return inliers

from matplotlib.patches import Ellipse
def draw_ellipse_around_points(points, ax, factor=2):    
    center = np.mean(points, axis=0)
    cov_matrix = np.cov(points - center, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    angle = np.degrees(np.arctan2(*eigenvectors[0][::-1]))
    width, height = 2 * np.sqrt(eigenvalues) * factor
    ellipse = Ellipse(xy=center, width=width, height=height, angle=angle, edgecolor='black', facecolor='none', lw=2)
    ax.add_patch(ellipse)

key = 'leiden3'
# compute centroid of each cluster in polar coordinates
clustcoords = pd.DataFrame(columns=['x','y'])
for c in d.obs[key].unique():
    clustcoords.loc[c] = {
        'x' : np.median(d[d.obs[key] == c].obsm['X_umap'][:,0]),
        'y' : np.median(d[d.obs[key] == c].obsm['X_umap'][:,1])}
center = np.mean(clustcoords[['x','y']].values, axis=0)
polar_coords = [to_polar(center, point) for point in clustcoords[['x','y']].values]
clustcoords[['r','theta']] = polar_coords
clustcoords['r_discrete'] = ((clustcoords.r // 2)*2)

%%capture
# make reports for each cluster
outdir = '_results/RA/allpatchclusters'
cmap = fibro_cmap

plt.figure(figsize=(8,6))
sc.pl.umap(d, s=30, show=False, ax=plt.gca())
for s in plt.gca().spines.values():
    s.set_visible(False)
plt.xlabel('UMAP 1', fontsize=14)
plt.ylabel('UMAP 2', fontsize=14)
plt.savefig(f'{outdir}/umap.png')
plt.close()

for i, c in enumerate(clustcoords.sort_values(by=['r_discrete','theta'], ascending=False).index):
    size = d.obs[key].value_counts()[c]
    print(c, size, end='|')
    if size < 50:
        continue
    d.obs['inclust'] = d.obs[key] == c

    # plot UMAP
    plt.figure(figsize=(8,6))
    sc.pl.umap(d, s=30, show=False, ax=plt.gca())
    draw_ellipse_around_points(remove_outliers(d[d.obs.inclust].obsm['X_umap'], threshold=1), plt.gca(), factor=2.2)
    for s in plt.gca().spines.values():
        s.set_visible(False)
    plt.xlabel('UMAP 1', fontsize=14)
    plt.ylabel('UMAP 2', fontsize=14)
    plt.savefig(f'{outdir}/umap_{i}_{c}_{size}.png')
    plt.close()

    # plot example patches
    np.random.seed(0)
    idx = np.arange(len(d))[d.obs.inclust.values]
    tv.plot_patches_overlaychannels_som(
        Mdense[idx][0],
        d.X[idx],
        cmap,
        nx=8, ny=8,
        show=False,
        scale_factor=1
    )
    plt.gcf().patch.set_facecolor('black')
    plt.savefig(f'{outdir}/examples_{i}_{c}_{size}.png')
    plt.close()

    # paste together the reports
    image1 = Image.open(f'{outdir}/umap_{i}_{c}_{size}.png')
    image2 = Image.open(f'{outdir}/examples_{i}_{c}_{size}.png')
    combined_width = image1.width + image2.width
    combined_height = max(image1.height, image2.height)
    combined_image = Image.new('RGB', (combined_width, combined_height), 'white')
    combined_image.paste(image1, (0, int((combined_height-image1.height)/2)))
    combined_image.paste(image2, (image1.width, 0))
    combined_image.save(f'{outdir}/combined_{i}_{c}_{size}.png')

In [None]:
def make_pics(clusts, clust_pics):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        for c in clusts:
            idx = np.arange(len(d))[d.obs[key] == c]
            tv.plot_patches_overlaychannels_linsum(
                Mdense[idx][0],
                d.X[idx],
                cmap,
                nx=nx, ny=ny,
                seed=0,
                show=False
            )
            plt.gcf().patch.set_facecolor('black')
            clust_pics[c] = BytesIO()
            plt.savefig(clust_pics[c], format="png", bbox_inches="tight", dpi=800)
            clust_pics[c].seek(0)
            plt.close()

from PIL import Image
def plot_clust(c, x, y, label):
    cc = clustcoords.loc[c]
    line(cc.x, cc.y, umap, x+insetsize/2, y+ar*insetsize/2, fig)
    ax = fig.add_axes([x, y, insetsize, ar*insetsize], zorder=1000)
    ax.imshow(Image.open(clust_pics[c]))
    ax.axis('off')

In [None]:
# choosing microneighborhoods to show
patchids = np.argsort(((d.X - d.X[9200])**2).sum(axis=1))[:50]
for pid in patchids:
    mn_pics = {}
    make_pics([pid], mn_pics)
    fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(4,2))
    axs[0].imshow(Image.open(mn_pics[pid]))
    axs[0].set_title(pid)
    sc.pl.umap(d, ax=axs[1], s=5, show=False)
    sc.pl.umap(d[d.obs[f'mn_{pid}'] > 0], color=f'mn_{pid}', s=15,
               colorbar_loc=None, title='',
               cmap='Reds', ax=axs[1], frameon=False, show=False)
    plt.show()

In [None]:
# plot patch reconstructions for different patches
for pid in np.random.choice(len(d), size=50, replace=False):
    print(pid)
    fig = plt.figure(figsize=(10, 5))
    plot_patchreconstructions(pid, 0, 0.5, 0.5, 0.5)
    plt.show()

In [None]:
# plot marker differences between different sets of patches
from scipy.stats import ttest_ind
avgs = pd.DataFrame(Mdense[:][0].mean(axis=(1,2)), index=d.obs.index, columns=markers)
stds = pd.DataFrame(Mdense[:][0].std(axis=(1,2)), index=d.obs.index, columns=markers)

T = ttest_ind(avgs[d.obs.pos_leiden == '0'], avgs[d.obs.pos_leiden == '1'], axis=0)
np.array(markers)[T.pvalue < 0.01], T.statistic[T.pvalue < 0.01]

T = ttest_ind(avgs[d.obs.neg_leiden == '0'], avgs[d.obs.neg_leiden == '1'], axis=0)
np.array(markers)[T.pvalue < 0.01], T.statistic[T.pvalue < 0.01]

In [None]:
tv.diff_markers(avgs, d.obs.pos_leiden == '0', d.obs.pos_leiden == '1', markers)

In [None]:
tv.diff_markers(avgs, d.obs.neg_leiden == '0', d.obs.neg_leiden == '1', markers)
tv.diff_markers(stds, d.obs.neg_leiden == '0', d.obs.neg_leiden == '1', markers)