In [1]:
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

## VISIUM smoothers

In [2]:
visium_smoothers_df = pd.read_csv('/lustre/scratch126/cellgen/team292/vl6/VISIUM/fallopian_axis_visium_fitted_values_tradeseq_epithelium.csv', 
                              index_col = 0)
print(visium_smoothers_df.shape)
visium_smoothers_df.head()

(464, 100)


Unnamed: 0,lineage1_1,lineage1_2,lineage1_3,lineage1_4,lineage1_5,lineage1_6,lineage1_7,lineage1_8,lineage1_9,lineage1_10,...,lineage1_91,lineage1_92,lineage1_93,lineage1_94,lineage1_95,lineage1_96,lineage1_97,lineage1_98,lineage1_99,lineage1_100
PNOC,90.553134,89.686782,88.828347,87.97739,87.133491,86.29624,85.465245,84.640123,83.82051,83.006051,...,2.13693,1.981397,1.837127,1.703314,1.579211,1.464121,1.357397,1.258437,1.166683,1.081614
RNASE1,18.715254,19.215412,19.726853,20.247627,20.775563,21.308256,21.843065,22.377107,22.907256,23.430151,...,4.744855,4.714616,4.687151,4.662095,4.63909,4.617788,4.597852,4.578948,4.56075,4.542937
CLDN6,29.34354,28.486165,27.653841,26.845836,26.061439,25.29996,24.560731,23.843099,23.146435,22.470126,...,2.034765,1.975327,1.917625,1.861609,1.807229,1.754438,1.703189,1.653437,1.605138,1.55825
RASSF10,20.674208,21.557139,22.47393,23.421695,24.396901,25.395325,26.412001,27.441192,28.476359,29.510151,...,5.159761,4.992507,4.83088,4.674659,4.523634,4.377604,4.236378,4.099774,3.967617,3.83974
RSPO1,11.607547,11.84239,12.080693,12.321156,12.562377,12.802845,13.040944,13.274954,13.503056,13.723336,...,1.433482,1.367537,1.304527,1.244338,1.186859,1.131982,1.079602,1.029616,0.981927,0.936437


## scRNA-seq anndata object for bona fide secretory epithelial genes

In [3]:
import scanpy as sc
import anndata

In [20]:
scrnaseq = sc.read('/nfs/team292/vl6/FetalReproductiveTract/post_10pcw_females.202412.h5ad')
scrnaseq.obs['celltype'].value_counts()

Fallopian Mese              27472
Uterus Mese                 26605
Fallopian Lig               15894
Epoophron Mese              12429
Perivascular                12195
Cervix/Upper Vagina SmMu    11129
Uterus Lig                  10700
Cervix Mese                  8248
Fallopian Epi                7764
Cervix/Vagina Lig            7728
Uterus/Cervix Epi            7394
Endothelial                  7331
Upper Vagina Mese            7192
Uterus SmMu                  7079
Lower Vagina Epi             6619
Schwann                      6464
Fallopian SmMu               5507
Pre-Perivascular             4445
Corpus Cavernosum            4218
Glans                        3695
Lower Vagina Mese            3300
Bladder Mese                 3079
Urethral Epi                 2414
Preputial Lamina Epi         2384
Corpus Spongiosum            2223
Coelomic Epi                 2106
Prepuce                      2090
Labia                        1792
Immune                       1788
Neural        

In [21]:
# Select cell types that might be contaminating the Visium signal 
scrnaseq = scrnaseq[[i in ['Fallopian Mese', 'Ciliated Epi', 'Mesonephric Tubules Epi', 
                          'Endothelial Lymphatic', 'Erythroid', 'Neural', 'Immune', 'Coelomic Epi', 
                          'Fallopian SmMu', 'Pre-Perivascular', 'Schwann', 
                          'Fallopian Epi', 'Endothelial', 'Perivascular', 
                          'Epoophron Mese', 'Fallopian Lig'] for i in scrnaseq.obs['celltype']]]
scrnaseq.shape

(106123, 36601)

In [22]:
scrnaseq.obs['celltype'].value_counts()

Fallopian Mese             27472
Fallopian Lig              15894
Epoophron Mese             12429
Perivascular               12195
Fallopian Epi               7764
Endothelial                 7331
Schwann                     6464
Fallopian SmMu              5507
Pre-Perivascular            4445
Coelomic Epi                2106
Immune                      1788
Neural                      1460
Endothelial Lymphatic        542
Mesonephric Tubules Epi      381
Erythroid                    227
Ciliated Epi                 118
Name: celltype, dtype: int64

## Compare expression of genes across cell types

In [23]:
genes = visium_smoothers_df.index.to_list()
len(genes)

464

In [24]:
genes = [i for i in genes if i in scrnaseq.var_names.to_list()]

In [25]:
len(genes)

460

In [26]:
cell_type_of_interest = 'Fallopian Epi'

In [27]:
# Calculate average expression per cell type
average_expression = scrnaseq.to_df().groupby(scrnaseq.obs['celltype']).mean()

# Filter the average expression table to include only the genes of interest
average_expression = average_expression.loc[:, genes]

# Create a table to summarize the results
summary_table = average_expression.reset_index()

In [28]:
summary_table

Unnamed: 0,celltype,PNOC,RNASE1,CLDN6,RASSF10,RSPO1,HOXA9,MSLN,GATA6,TSPAN7,...,N4BP3,PARD6B,NECTIN3,C6orf132,MAPRE3,HMGCS1,GFAP,SIGLEC15,PTPN3,MAP6
0,Coelomic Epi,0.610377,0.069743,0.060441,0.075872,0.480134,0.072882,1.631269,1.390298,0.544202,...,0.015706,0.541537,0.184261,0.093347,0.111679,0.320981,0.003954,0.000129,0.078497,0.036905
1,Mesonephric Tubules Epi,0.101993,0.022261,0.576291,0.241683,0.008168,0.378185,0.104466,0.035187,0.153233,...,0.028001,0.551818,0.224752,0.054509,0.082307,0.352402,0.035722,0.017521,0.300182,0.022108
2,Epoophron Mese,0.004593,0.012018,0.000125,0.000208,0.040961,0.24306,0.001459,0.252617,0.138595,...,0.043429,0.002615,0.17177,0.023554,0.079039,0.2711,0.000121,0.000217,0.000654,0.168039
3,Fallopian Epi,1.624585,0.899715,0.621305,0.369926,0.20588,0.005852,1.861404,0.60466,0.633274,...,0.092847,0.651259,0.376251,0.282135,0.077035,0.588767,0.009179,0.015804,0.133731,0.020385
4,Ciliated Epi,0.016853,0.174837,0.02193,0.119791,0.010233,0.014034,0.244676,0.130964,0.063494,...,0.015708,0.604436,0.219529,0.306074,0.916133,0.266574,0.012765,0.043596,0.175905,0.590815
5,Fallopian Mese,0.007976,0.039085,0.000844,0.00093,0.02825,0.200293,0.010918,0.425786,0.14154,...,0.047773,0.020532,0.203397,0.014294,0.103982,0.362658,0.000436,0.000112,0.00351,0.179082
6,Fallopian Lig,0.010748,0.00997,0.000305,0.000431,0.077072,0.348166,0.031195,0.824053,0.248579,...,0.023117,0.031761,0.155019,0.027472,0.083449,0.222975,0.000206,5.7e-05,0.002301,0.123962
7,Fallopian SmMu,0.010288,0.005143,0.00098,0.001226,0.174705,0.672623,0.016266,0.682099,0.738644,...,0.028614,0.026682,0.137869,0.020352,0.14919,0.330694,0.000112,4.1e-05,0.007826,0.220177
8,Pre-Perivascular,0.002423,0.012086,0.000715,0.002115,0.022449,0.146787,0.000602,0.486447,0.060064,...,0.008072,0.003537,0.150725,0.009439,0.068448,0.157863,0.000609,0.0,0.003832,0.109832
9,Perivascular,0.001325,0.022993,0.001304,0.001977,0.007885,0.132638,0.003272,0.0833,0.042351,...,0.007369,0.021536,0.17467,0.003273,0.070614,0.273281,0.000184,0.000187,0.027936,0.105572


In [29]:
# First filtering step: Filter the genes based on the criteria that their expression in cell type "A" is within the top 3 cell types
filtered_genes = []
for gene in genes:
    # Sort the average expression of the gene across all cell types in descending order
    sorted_expression = average_expression[gene].sort_values(ascending=False)
    
    # Check if the cell type of interest is within the top 4 cell types
    if cell_type_of_interest in sorted_expression.index[:4]:
        filtered_genes.append(gene)

# Filter the summary table to retain only the filtered genes
filtered_summary_table = summary_table[['celltype'] + filtered_genes]

In [30]:
filtered_summary_table

Unnamed: 0,celltype,PNOC,RNASE1,CLDN6,RASSF10,RSPO1,MSLN,GATA6,DOK5,PROM1,...,STMND1,BAIAP3,N4BP3,PARD6B,NECTIN3,C6orf132,HMGCS1,GFAP,SIGLEC15,PTPN3
0,Coelomic Epi,0.610377,0.069743,0.060441,0.075872,0.480134,1.631269,1.390298,0.112219,0.088938,...,0.002009,0.008009,0.015706,0.541537,0.184261,0.093347,0.320981,0.003954,0.000129,0.078497
1,Mesonephric Tubules Epi,0.101993,0.022261,0.576291,0.241683,0.008168,0.104466,0.035187,0.06653,0.246823,...,0.007024,0.107514,0.028001,0.551818,0.224752,0.054509,0.352402,0.035722,0.017521,0.300182
2,Epoophron Mese,0.004593,0.012018,0.000125,0.000208,0.040961,0.001459,0.252617,0.106396,0.00908,...,0.000342,0.000908,0.043429,0.002615,0.17177,0.023554,0.2711,0.000121,0.000217,0.000654
3,Fallopian Epi,1.624585,0.899715,0.621305,0.369926,0.20588,1.861404,0.60466,0.682405,0.093354,...,0.015084,0.034387,0.092847,0.651259,0.376251,0.282135,0.588767,0.009179,0.015804,0.133731
4,Ciliated Epi,0.016853,0.174837,0.02193,0.119791,0.010233,0.244676,0.130964,0.029755,0.458229,...,0.749671,1.245472,0.015708,0.604436,0.219529,0.306074,0.266574,0.012765,0.043596,0.175905
5,Fallopian Mese,0.007976,0.039085,0.000844,0.00093,0.02825,0.010918,0.425786,0.025965,0.005481,...,0.001249,0.000883,0.047773,0.020532,0.203397,0.014294,0.362658,0.000436,0.000112,0.00351
6,Fallopian Lig,0.010748,0.00997,0.000305,0.000431,0.077072,0.031195,0.824053,0.041694,0.023062,...,0.000626,0.001191,0.023117,0.031761,0.155019,0.027472,0.222975,0.000206,5.7e-05,0.002301
7,Fallopian SmMu,0.010288,0.005143,0.00098,0.001226,0.174705,0.016266,0.682099,0.013517,0.070255,...,0.001702,0.000347,0.028614,0.026682,0.137869,0.020352,0.330694,0.000112,4.1e-05,0.007826
8,Pre-Perivascular,0.002423,0.012086,0.000715,0.002115,0.022449,0.000602,0.486447,0.152426,0.015989,...,0.000514,0.001117,0.008072,0.003537,0.150725,0.009439,0.157863,0.000609,0.0,0.003832
9,Perivascular,0.001325,0.022993,0.001304,0.001977,0.007885,0.003272,0.0833,0.061256,0.022592,...,0.001804,0.004444,0.007369,0.021536,0.17467,0.003273,0.273281,0.000184,0.000187,0.027936


In [31]:
filtered_genes_step2 = []
for gene in filtered_genes:
    expression_interest = average_expression.loc[cell_type_of_interest, gene]
    expression_comparison = average_expression.loc["Ciliated Epi", gene]
    
    # Check if the expression in the cell type of interest is greater than in the comparison cell type
    # or if it's not more than 30% less than in the comparison cell type
    if expression_interest > expression_comparison or expression_comparison <= 1.2 * expression_interest:
        filtered_genes_step2.append(gene)

# Filter the summary table to retain only the filtered genes from the second step
filtered_summary_table_step2 = filtered_summary_table[['celltype'] + filtered_genes_step2]


In [32]:
filtered_summary_table_step2

Unnamed: 0,celltype,PNOC,RNASE1,CLDN6,RASSF10,RSPO1,MSLN,GATA6,DOK5,PPP2R2B,...,KRT7,EVPL,WDR72,SYNDIG1,LSR,N4BP3,PARD6B,NECTIN3,C6orf132,HMGCS1
0,Coelomic Epi,0.610377,0.069743,0.060441,0.075872,0.480134,1.631269,1.390298,0.112219,0.13775,...,0.695221,0.054187,0.015914,0.02056,0.603456,0.015706,0.541537,0.184261,0.093347,0.320981
1,Mesonephric Tubules Epi,0.101993,0.022261,0.576291,0.241683,0.008168,0.104466,0.035187,0.06653,0.045145,...,0.566106,0.07351,0.458149,0.149692,0.67073,0.028001,0.551818,0.224752,0.054509,0.352402
2,Epoophron Mese,0.004593,0.012018,0.000125,0.000208,0.040961,0.001459,0.252617,0.106396,0.002859,...,0.000609,0.000149,0.000543,0.001953,0.006609,0.043429,0.002615,0.17177,0.023554,0.2711
3,Fallopian Epi,1.624585,0.899715,0.621305,0.369926,0.20588,1.861404,0.60466,0.682405,0.542199,...,1.171371,0.093291,0.12647,0.058409,0.689024,0.092847,0.651259,0.376251,0.282135,0.588767
4,Ciliated Epi,0.016853,0.174837,0.02193,0.119791,0.010233,0.244676,0.130964,0.029755,0.152277,...,0.284562,0.046833,0.077905,0.02077,0.683478,0.015708,0.604436,0.219529,0.306074,0.266574
5,Fallopian Mese,0.007976,0.039085,0.000844,0.00093,0.02825,0.010918,0.425786,0.025965,0.035053,...,0.002195,0.002014,0.003632,0.001262,0.022663,0.047773,0.020532,0.203397,0.014294,0.362658
6,Fallopian Lig,0.010748,0.00997,0.000305,0.000431,0.077072,0.031195,0.824053,0.041694,0.058797,...,0.006092,0.002076,0.001219,0.005808,0.041709,0.023117,0.031761,0.155019,0.027472,0.222975
7,Fallopian SmMu,0.010288,0.005143,0.00098,0.001226,0.174705,0.016266,0.682099,0.013517,0.014646,...,0.002316,0.014007,0.002532,0.003639,0.016152,0.028614,0.026682,0.137869,0.020352,0.330694
8,Pre-Perivascular,0.002423,0.012086,0.000715,0.002115,0.022449,0.000602,0.486447,0.152426,0.003239,...,0.00168,0.000808,8.7e-05,0.026448,0.016779,0.008072,0.003537,0.150725,0.009439,0.157863
9,Perivascular,0.001325,0.022993,0.001304,0.001977,0.007885,0.003272,0.0833,0.061256,0.006494,...,0.002418,0.000865,0.000913,0.001213,0.080074,0.007369,0.021536,0.17467,0.003273,0.273281


### Save remaining genes and plot their trends in TradeSeq 

In [33]:
print(filtered_summary_table_step2.columns.to_list())

['celltype', 'PNOC', 'RNASE1', 'CLDN6', 'RASSF10', 'RSPO1', 'MSLN', 'GATA6', 'DOK5', 'PPP2R2B', 'WT1', 'AIF1L', 'ARL4C', 'C19orf33', 'ASS1', 'AFAP1L2', 'WNT10A', 'LRIG1', 'EGR1', 'UPK3B', 'CRTAC1', 'DLGAP1', 'TNFRSF12A', 'ADAM28', 'GALNT17', 'MMP28', 'ADAMTS1', 'ALDH1A2', 'CD109', 'AAK1', 'EFNB2', 'ARHGEF19', 'ERP27', 'MCL1', 'IMPG2', 'CLDN4', 'MUC6', 'NUAK2', 'S100A1', 'CCN1', 'CRB2', 'CYP26B1', 'RIMBP2', 'EPHA2', 'SNCB', 'FOSB', 'EYA4', 'PCDH17', 'ELF3', 'CXCR4', 'CMTM6', 'PALM3', 'ATF3', 'APOA1', 'SLC26A7', 'BTG2', 'BCL11A', 'CSRNP1', 'SPDEF', 'GADD45A', 'SLITRK2', 'GMNC', 'NR4A1', 'EDN2', 'CHI3L1', 'RNF212', 'GSN', 'SPOCK2', 'FRZB', 'DUSP2', 'NPR1', 'EGR2', 'BDP1', 'GREB1', 'EGR3', 'ACE', 'NELL1', 'CCN2', 'TRIB1', 'RUNX2', 'BCAT1', 'DHCR24', 'IER2', 'MXRA5', 'PLEKHG1', 'ADORA1', 'DLX5', 'CACNA2D3', 'HAPLN3', 'CCN3', 'BEND7', 'DACH2', 'RBP4', 'IQCN', 'CCNL1', 'ITPKC', 'TMEM132B', 'SPOCK3', 'ATP13A4', 'LAMA1', 'COL9A1', 'CHRNA4', 'TM7SF2', 'AJUBA', 'PSAT1', 'HOMER2', 'DCBLD2', 'MAP3K