In [None]:
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt

## VISIUM smoothers

In [None]:
visium_smoothers_df = pd.read_csv('/lustre/scratch126/cellgen/team292/vl6/VISIUM/malereproductiveaxis_epi_cytassist_downsampled_fitted_values_tradeseq.csv', 
                              index_col = 0)
print(visium_smoothers_df.shape)
visium_smoothers_df.head()

## scRNA-seq anndata object for bona fide secretory epithelial genes

In [None]:
import scanpy as sc
import anndata

In [None]:
scrnaseq = sc.read('/nfs/team292/vl6/FetalReproductiveTract/males_post10pcw.20240326.h5ad')
scrnaseq.obs['celltype'].value_counts()

In [None]:
# Select cell types that might be contaminating the Visium signal 
scrnaseq = scrnaseq[[i in ['Corpus/CaputEpididymis_Mesenchyme', 'CaudaEpididymis_Mesenchyme', 'CaputEpididymis_Epithelium', 
                          'Endothelial_Lymphatic', 'Erythroid', 'Neural', 'Immune', 'Coelomic_Epithelium', 
                          'Epididymis_Ligament', 'Pre-Perivascular', 'SchwannCell', 
                          'Corpus/CaudaEpididymis_Epithelium', 'Endothelial', 'Perivascular', 
                          'CaputEpididymis_Ciliated_Epithelium', 'SkeletalMuscle', 
                          'VasDeferens_Ligament', 'VasDeferens_Mesenchyme', ] for i in scrnaseq.obs['celltype']]]
scrnaseq.shape

## Compare expression of genes across cell types

In [None]:
genes = visium_smoothers_df.index.to_list()
len(genes)

In [None]:
genes = [i for i in genes if i in scrnaseq.var_names.to_list()]

In [None]:
len(genes)

In [None]:
cell_types_of_interest = ['Corpus/CaudaEpididymis_Epithelium', 'CaputEpididymis_Epithelium']

In [None]:
# Calculate average expression per cell type
average_expression = scrnaseq.to_df().groupby(scrnaseq.obs['celltype']).mean()

# Filter the average expression table to include only the genes of interest
average_expression = average_expression.loc[:, genes]

# Create a table to summarize the results
summary_table = average_expression.reset_index()

In [None]:
summary_table

In [None]:
# Filtering step 1
filtered_genes = []
for gene in genes:
    # Sort the average expression of the gene across all cell types in descending order
    sorted_expression = average_expression[gene].sort_values(ascending=False)
    
    # Check if either cell type of interest is within the top 2 cell types
    if any(cell_type in sorted_expression.index[:3] for cell_type in cell_types_of_interest):
        filtered_genes.append(gene)

# Filter the summary table to retain only the filtered genes
filtered_summary_table = summary_table[['celltype'] + filtered_genes]

In [None]:
filtered_summary_table

In [None]:
# Filtering step 2 to remove ciliated genes
filtered_genes_step2 = []
for gene in filtered_genes:
    expression_ciliated = average_expression.loc['CaputEpididymis_Ciliated_Epithelium', gene]
    
    # Check if the expression in secretory epithelium is greater than in ciliated epithelium
    if any(average_expression.loc[cell_type, gene] > expression_ciliated for cell_type in cell_types_of_interest):
        filtered_genes_step2.append(gene)

# Filter the summary table to retain only the filtered genes from the second step
filtered_summary_table_step2 = filtered_summary_table[['celltype'] + filtered_genes_step2]


In [None]:
filtered_summary_table_step2

### Save remaining genes and plot their trends in TradeSeq 

In [None]:
print(filtered_summary_table_step2.columns.to_list())

## Intersect prioritised genes with human TFs

In [None]:
tfs = pd.read_csv('/nfs/team292/vl6/FetalReproductiveTract/humanTFs/DatabaseExtract_v_1.01.csv')

In [None]:
tfs = tfs[tfs['Is TF?'] == 'Yes']
tfs = tfs[tfs['TF assessment'] == 'Known motif']
tfs = tfs['HGNC symbol'].tolist()
len(tfs)

In [None]:
tfs_prioritised = [i for i in filtered_summary_table_step2.columns.to_list() if i in tfs]

In [None]:
print(len(tfs_prioritised))
print(tfs_prioritised)

In [None]:
tfs_prioritised = [i for i in tfs_prioritised if not i.startswith("HOX")]

In [None]:
len(tfs_prioritised)

In [None]:
print(tfs_prioritised)