In [1]:
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

## VISIUM smoothers

In [17]:
visium_smoothers_df = pd.read_csv('/lustre/scratch126/cellgen/team292/vl6/VISIUM/epididymis_visium_fitted_values_tradeseq_epithelium_mesenchyme.csv', 
                              index_col = 0)
print(visium_smoothers_df.shape)
visium_smoothers_df.head()

(485, 100)


Unnamed: 0,lineage1_1,lineage1_2,lineage1_3,lineage1_4,lineage1_5,lineage1_6,lineage1_7,lineage1_8,lineage1_9,lineage1_10,...,lineage1_91,lineage1_92,lineage1_93,lineage1_94,lineage1_95,lineage1_96,lineage1_97,lineage1_98,lineage1_99,lineage1_100
KRT17,0.2266,0.263181,0.305682,0.355085,0.412537,0.479385,0.557212,0.647879,0.753576,0.876887,...,45.402874,45.485892,45.619502,45.797813,46.014985,46.265163,46.542439,46.840805,47.154122,47.4761
DEPRECATED_ENSG00000128422,0.785621,0.839336,0.897033,0.959358,1.027075,1.101089,1.182474,1.272505,1.372701,1.484876,...,45.318339,45.600295,45.944157,46.343707,46.792687,47.284717,47.813232,48.371412,48.952139,49.547951
PDK4,32.189716,34.505829,36.96322,39.54132,42.212257,44.9401,47.680366,50.37989,52.977194,55.403449,...,0.943623,0.965212,0.987876,1.011594,1.03634,1.062082,1.088783,1.116403,1.144892,1.174195
MUC6,9.446734,9.816909,10.203528,10.609406,11.03772,11.492059,11.976476,12.495553,13.054484,13.659169,...,270.077385,273.930744,277.960975,282.158771,286.5142,291.016628,295.654643,300.415985,305.28748,310.25498
EPHA7,20.268748,21.745693,23.315781,24.96821,26.687998,28.455489,30.246004,32.029649,33.771394,35.431438,...,0.770521,0.802294,0.836378,0.872825,0.911678,0.952974,0.996737,1.042978,1.091691,1.14285


## scRNA-seq anndata object for bona fide secretory epithelial genes

In [3]:
import scanpy as sc
import anndata

In [4]:
scrnaseq = sc.read('/nfs/team292/vl6/FetalReproductiveTract/males_post10pcw.202412.h5ad')
scrnaseq.obs['celltype'].value_counts()

Epididymis Mese          45915
Vas Deferens Lig         15697
Epididymis Lig            6618
Perivascular              5650
Vas Deferens Mese         4301
Prostate SmMu             4021
Endothelial               3566
Glans                     3455
Corpus Cavernosum         3365
Upper Prostate Mese       3333
Schwann                   2896
Corpus Spongiosum         2705
Pre-Perivascular          2666
Lower Prostate Mese       2651
Epididymis Epi            2540
Prostate Epi              2403
Urethral Epi              2219
Prostate Lig              1777
Prepuce                   1558
Vas Deferens Epi          1316
Neural                    1126
Skeletal Muscle           1121
Immune                    1074
Preputial Lamina          1026
Coelomic Epi               630
Surface Genital Epi        564
Müllerian Epi              408
Endothelial Lymphatic      377
Ciliated Epi               361
Erythroid                   98
Name: celltype, dtype: int64

In [5]:
# Select cell types that might be contaminating the Visium signal 
scrnaseq = scrnaseq[[i in ['Epididymis Mese', 'Vas Deferens Lig', 'Epididymis Lig', 'Vas Deferens Mese',
                          'Endothelial Lymphatic', 'Erythroid', 'Neural', 'Immune', 'Coelomic Epi', 
                          'Epididymis Epi', 'Vas Deferens Epi', 'Pre-Perivascular', 'Schwann', 
                           'Endothelial', 'Perivascular', 
                          'Ciliated Epi', 'SkeletalMuscle'] for i in scrnaseq.obs['celltype']]]
scrnaseq.shape
scrnaseq.obs['celltype'].value_counts()

Epididymis Mese          45915
Vas Deferens Lig         15697
Epididymis Lig            6618
Perivascular              5650
Vas Deferens Mese         4301
Endothelial               3566
Schwann                   2896
Pre-Perivascular          2666
Epididymis Epi            2540
Vas Deferens Epi          1316
Neural                    1126
Immune                    1074
Coelomic Epi               630
Endothelial Lymphatic      377
Ciliated Epi               361
Erythroid                   98
Name: celltype, dtype: int64

## Compare expression of genes across cell types

In [18]:
genes = visium_smoothers_df.index.to_list()
len(genes)

485

In [19]:
genes = [i for i in genes if i in scrnaseq.var_names.to_list()]

In [20]:
len(genes)

482

In [21]:
cell_types_of_interest = ['Epididymis Epi', 'Vas Deferens Epi']

In [22]:
# Calculate average expression per cell type
average_expression = scrnaseq.to_df().groupby(scrnaseq.obs['celltype']).mean()

# Filter the average expression table to include only the genes of interest
average_expression = average_expression.loc[:, genes]

# Create a table to summarize the results
summary_table = average_expression.reset_index()

In [23]:
summary_table

Unnamed: 0,celltype,KRT17,PDK4,MUC6,EPHA7,TPPP3,CLDN2,CFAP157,C20orf85,CXCL14,...,ADM2,TGM2,GON7,MARCH10,TMEM158,DPT,FOXO4,CRYM,C21orf58,MOB3B
0,Coelomic Epi,0.147034,0.057891,0.006178,0.117269,0.020746,0.009716,0.000543,0.005789,0.042507,...,0.007043,0.24989,0.086477,0.003706,0.450785,0.011955,0.047765,0.036744,0.044149,0.093714
1,Epididymis Epi,0.228113,1.39664,0.176315,1.115521,0.053037,1.767909,0.064041,0.043902,0.681047,...,0.120402,0.408592,0.179717,0.018321,0.013727,0.002221,0.034432,0.125342,0.099924,0.147541
2,Ciliated Epi,0.100354,1.033658,0.118423,0.363273,2.182509,0.500582,1.715632,2.985642,0.285522,...,0.036898,0.125912,1.029949,0.407286,0.031498,0.020587,0.05619,0.03672,1.148694,0.052784
3,Epididymis Mese,0.012609,0.047115,0.001006,0.027361,0.042909,0.000736,0.000916,0.00059,0.012898,...,0.00128,0.375449,0.074546,0.000478,0.072352,0.108914,0.121992,0.014143,0.077979,0.096661
4,Epididymis Lig,0.012124,0.095423,0.001987,0.044963,0.065631,0.0012,0.000993,0.000314,0.0253,...,0.001438,0.160209,0.062209,0.000823,0.168581,0.07644,0.049632,0.004399,0.01398,0.152723
5,Vas Deferens Epi,1.956845,0.144796,0.743401,0.02936,0.016322,0.187878,0.023129,0.007599,1.823857,...,0.022373,0.181165,0.143563,0.01755,0.063548,0.007024,0.039005,0.015499,0.066878,0.306517
6,Vas Deferens Mese,0.049325,0.055407,0.002262,0.052243,0.031614,0.001365,0.001134,0.000238,0.008056,...,0.005807,0.346462,0.08186,0.006966,0.373272,0.017688,0.075615,0.015797,0.031429,0.126843
7,Vas Deferens Lig,0.006241,0.052747,0.00142,0.101264,0.429711,0.001653,0.001399,0.000519,0.0163,...,0.001372,0.081941,0.07582,0.00074,0.413296,0.168335,0.022243,0.011271,0.056342,0.090672
8,Pre-Perivascular,0.016842,0.134465,0.001306,0.040001,0.072166,0.001203,0.000579,0.000625,0.084391,...,0.001158,0.092957,0.071546,0.0,0.157024,0.12646,0.022224,0.030362,0.031859,0.145233
9,Perivascular,0.058644,0.127106,0.000977,0.048041,0.274397,0.00198,0.00084,0.000503,0.156897,...,0.002212,0.122328,0.068912,0.000712,0.079846,0.042084,0.022785,0.007429,0.066858,0.178287


In [24]:
# Filtering step 1
filtered_genes = []
for gene in genes:
    # Sort the average expression of the gene across all cell types in descending order
    sorted_expression = average_expression[gene].sort_values(ascending=False)
    
    # Check if either cell type of interest is within the top 2 cell types
    if any(cell_type in sorted_expression.index[:2] for cell_type in cell_types_of_interest):
        filtered_genes.append(gene)

# Filter the summary table to retain only the filtered genes
filtered_summary_table = summary_table[['celltype'] + filtered_genes]

In [25]:
filtered_summary_table

Unnamed: 0,celltype,KRT17,PDK4,MUC6,EPHA7,CLDN2,CFAP157,C20orf85,CXCL14,CDC20B,...,LRRC36,CRYL1,CCDC173,RSPH10B2,SCO2,ADM2,GON7,MARCH10,CRYM,MOB3B
0,Coelomic Epi,0.147034,0.057891,0.006178,0.117269,0.009716,0.000543,0.005789,0.042507,0.004773,...,0.013727,0.305155,0.077505,0.0,0.08263,0.007043,0.086477,0.003706,0.036744,0.093714
1,Epididymis Epi,0.228113,1.39664,0.176315,1.115521,1.767909,0.064041,0.043902,0.681047,0.087529,...,0.116968,0.70939,0.129018,0.007382,0.170028,0.120402,0.179717,0.018321,0.125342,0.147541
2,Ciliated Epi,0.100354,1.033658,0.118423,0.363273,0.500582,1.715632,2.985642,0.285522,0.505414,...,0.234508,0.465986,1.086073,0.251433,0.287049,0.036898,1.029949,0.407286,0.03672,0.052784
3,Epididymis Mese,0.012609,0.047115,0.001006,0.027361,0.000736,0.000916,0.00059,0.012898,0.001649,...,0.001984,0.158478,0.032296,9.4e-05,0.06097,0.00128,0.074546,0.000478,0.014143,0.096661
4,Epididymis Lig,0.012124,0.095423,0.001987,0.044963,0.0012,0.000993,0.000314,0.0253,0.002044,...,0.000671,0.148551,0.030976,0.000119,0.075284,0.001438,0.062209,0.000823,0.004399,0.152723
5,Vas Deferens Epi,1.956845,0.144796,0.743401,0.02936,0.187878,0.023129,0.007599,1.823857,0.006658,...,0.023626,0.323015,0.096498,0.000179,0.096518,0.022373,0.143563,0.01755,0.015499,0.306517
6,Vas Deferens Mese,0.049325,0.055407,0.002262,0.052243,0.001365,0.001134,0.000238,0.008056,0.003423,...,0.001723,0.186268,0.065822,0.0,0.062407,0.005807,0.08186,0.006966,0.015797,0.126843
7,Vas Deferens Lig,0.006241,0.052747,0.00142,0.101264,0.001653,0.001399,0.000519,0.0163,0.004177,...,0.004463,0.286847,0.030366,0.000291,0.108726,0.001372,0.07582,0.00074,0.011271,0.090672
8,Pre-Perivascular,0.016842,0.134465,0.001306,0.040001,0.001203,0.000579,0.000625,0.084391,0.002862,...,0.00035,0.203997,0.039523,0.000241,0.075006,0.001158,0.071546,0.0,0.030362,0.145233
9,Perivascular,0.058644,0.127106,0.000977,0.048041,0.00198,0.00084,0.000503,0.156897,0.00159,...,0.000908,0.103649,0.037708,0.0,0.087395,0.002212,0.068912,0.000712,0.007429,0.178287


In [26]:
# Filtering step 2 to remove ciliated genes
filtered_genes_step2 = []
for gene in filtered_genes:
    expression_ciliated = average_expression.loc['Ciliated Epi', gene]
    
    # Check if the expression in secretory epithelium is greater than in ciliated epithelium
    if any(average_expression.loc[cell_type, gene] > expression_ciliated for cell_type in cell_types_of_interest):
        filtered_genes_step2.append(gene)

# Filter the summary table to retain only the filtered genes from the second step
filtered_summary_table_step2 = filtered_summary_table[['celltype'] + filtered_genes_step2]



In [27]:
filtered_summary_table_step2

Unnamed: 0,celltype,KRT17,PDK4,MUC6,EPHA7,CLDN2,CXCL14,SLC3A1,GATA3,POMC,...,ADRA2C,INSIG2,OSBPL10,PAQR4,CREB5,SLC30A2,CRYL1,ADM2,CRYM,MOB3B
0,Coelomic Epi,0.147034,0.057891,0.006178,0.117269,0.009716,0.042507,0.014786,0.004004,0.08537,...,0.005344,0.121666,0.147235,0.020875,0.362212,0.0,0.305155,0.007043,0.036744,0.093714
1,Epididymis Epi,0.228113,1.39664,0.176315,1.115521,1.767909,0.681047,0.839713,0.028862,1.230517,...,0.00542,0.163214,0.190416,0.548511,0.655844,0.059564,0.70939,0.120402,0.125342,0.147541
2,Ciliated Epi,0.100354,1.033658,0.118423,0.363273,0.500582,0.285522,0.256506,0.009475,0.277299,...,0.005639,0.134627,0.064492,0.120828,0.238484,0.012597,0.465986,0.036898,0.03672,0.052784
3,Epididymis Mese,0.012609,0.047115,0.001006,0.027361,0.000736,0.012898,0.016316,0.023365,0.054417,...,0.004178,0.078912,0.023759,0.032751,0.263928,0.000197,0.158478,0.00128,0.014143,0.096661
4,Epididymis Lig,0.012124,0.095423,0.001987,0.044963,0.0012,0.0253,0.015373,0.004784,0.058324,...,0.014149,0.114059,0.050857,0.008429,0.409609,0.0,0.148551,0.001438,0.004399,0.152723
5,Vas Deferens Epi,1.956845,0.144796,0.743401,0.02936,0.187878,1.823857,0.056738,1.169439,0.212823,...,0.062034,0.310138,0.043508,0.064105,0.689803,0.003657,0.323015,0.022373,0.015499,0.306517
6,Vas Deferens Mese,0.049325,0.055407,0.002262,0.052243,0.001365,0.008056,0.021916,0.017079,0.08289,...,0.008471,0.10569,0.031867,0.022599,0.304033,0.0,0.186268,0.005807,0.015797,0.126843
7,Vas Deferens Lig,0.006241,0.052747,0.00142,0.101264,0.001653,0.0163,0.020816,0.003007,0.057102,...,0.041537,0.147066,0.029673,0.039579,0.519809,0.00012,0.286847,0.001372,0.011271,0.090672
8,Pre-Perivascular,0.016842,0.134465,0.001306,0.040001,0.001203,0.084391,0.017253,0.009202,0.061157,...,0.006759,0.106894,0.063257,0.026192,0.327687,0.0,0.203997,0.001158,0.030362,0.145233
9,Perivascular,0.058644,0.127106,0.000977,0.048041,0.00198,0.156897,0.01257,0.028123,0.060539,...,0.012186,0.116452,0.09535,0.051564,0.320839,0.000288,0.103649,0.002212,0.007429,0.178287


### Save remaining genes and plot their trends in TradeSeq 

In [28]:
print(filtered_summary_table_step2.columns.to_list())

['celltype', 'KRT17', 'PDK4', 'MUC6', 'EPHA7', 'CLDN2', 'CXCL14', 'SLC3A1', 'GATA3', 'POMC', 'TFAP2A', 'SPOCK2', 'CPM', 'PIK3C2G', 'CPXM2', 'USH1C', 'SALL1', 'MGAM', 'WNT6', 'MAL', 'ACSL5', 'CDH6', 'TBC1D9', 'PCP4', 'VIL1', 'CA2', 'PTGDS', 'SPAG11A', 'SLC38A11', 'NTRK2', 'PLXNA4', 'DPEP1', 'NSG1', 'RASD1', 'MME', 'HNF4A', 'PDZK1IP1', 'HMOX1', 'ZNF385B', 'SYTL2', 'SPP1', 'IRX1', 'RDH10', 'ITGB6', 'MFSD2A', 'TUBB2B', 'TACSTD2', 'CLDN6', 'SLC47A1', 'BLNK', 'APCDD1', 'GRHL2', 'OTULINL', 'UGT2A3', 'MAL2', 'BHMT', 'CDH3', 'MSLN', 'CDH2', 'CPLX2', 'HGD', 'RNF128', 'S100A14', 'BVES', 'TSPAN12', 'CA12', 'WDR72', 'PRDM16', 'SMIM24', 'BICDL1', 'NAPSA', 'SLC39A5', 'ADGRG2', 'NPR3', 'SLC2A9', 'GMNC', 'FXYD2', 'SAMD5', 'KREMEN2', 'PROM2', 'LYPD1', 'PTPRZ1', 'TRPC6', 'RHEX', 'SERPINI1', 'RAP1GAP2', 'GNAO1', 'COL26A1', 'GREB1', 'GATM', 'GALNT14', 'ESR1', 'ASRGL1', 'MLXIPL', 'GABRA2', 'VEGFC', 'HNF1A', 'WNT10A', 'SSC4D', 'LHX1', 'MSRB1', 'LYPD6B', 'CLDN10', 'CRYAB', 'ANPEP', 'ABCC3', 'AMN', 'F2RL1', 'T