-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Failure to reproduce the paper #5
Comments
All the code can be found in below import omicverse as ov
import scanpy as sc
import scvelo as scv
import TFvelo as Tfv
ov.plot_set() adata = scv.datasets.pancreas()
adata
if "spliced" in adata.layers:
adata.layers["total"] = adata.layers["spliced"].todense() + adata.layers["unspliced"].todense()
elif "new" in adata.layers:
adata.layers["total"] = np.array(adata.layers["total"].todense())
else:
adata.layers["total"] = adata.X
adata.layers['count']=adata.X.copy()
adata.layers["total_raw"] = adata.layers["total"].copy()
n_cells, n_genes = adata.X.shape
sc.pp.filter_genes(adata, min_cells=int(n_cells/50))
sc.pp.filter_cells(adata, min_genes=int(n_genes/50))
Tfv.pp.filter_and_normalize(adata, min_shared_counts=20,
n_top_genes=2000, log=True,subset_highly_variable=False) #include the following steps
adata.X = adata.layers["total"].copy()
adata.raw=adata.copy()
adata=adata[:,adata.var['highly_variable']==True]
adata
gene_names = []
for tmp in adata.var_names:
gene_names.append(tmp.upper())
adata.var_names = gene_names
adata.var_names_make_unique()
adata.obs_names_make_unique()
Tfv.pp.moments(adata, n_pcs=30, n_neighbors=15)
Tfv.pp.get_TFs(adata, databases='ENCODE ChEA')
import numpy as np
adata.uns['genes_pp'] = np.array(adata.var_names) import os
n_jobs_max = np.max([int(os.cpu_count()/2), 1])
n_jobs=8
if n_jobs >= 1:
n_jobs = np.min([n_jobs, n_jobs_max])
else:
n_jobs = n_jobs_max
print('n_jobs:', n_jobs)
flag = Tfv.tl.recover_dynamics(adata, n_jobs=n_jobs, max_iter=20, var_names='all',
WX_method = 'lsq_linear', WX_thres=20, max_n_TF=99, n_top_genes=2000,
fit_scaling=True, use_raw=0, init_weight_method='correlation',
n_time_points=1000) adata.write('data/tfvelo_rc.h5ad',compression='gzip') def get_pseudotime(adata):
Tfv.tl.velocity_graph(adata, basis=None, vkey='velocity', xkey='M_total')
Tfv.tl.velocity_pseudotime(adata, vkey='velocity', modality='M_total')
Tfv.pl.scatter(adata, basis='X_umap', color='velocity_pseudotime', cmap='gnuplot', fontsize=12, save='pseudotime')
return adata def check_data_type(adata):
for key in list(adata.var):
if adata.var[key][0] in ['True', 'False']:
print('Checking', key)
adata.var[key] = adata.var[key].map({'True': True, 'False': False})
return adata.var_names_make_unique()
check_data_type(adata)
print(adata)
losses = adata.varm['loss'].copy()
losses[np.isnan(losses)] = 1e6
adata.var['min_loss'] = losses.min(1)
n_cells = adata.shape[0]
expanded_scaling_y = np.expand_dims(np.array(adata.var['fit_scaling_y']),0).repeat(n_cells,axis=0)
adata.layers['velocity'] = adata.layers['velo_hat'] / expanded_scaling_y
if 'X_pca' not in adata.obsm.keys():
print('PCA ing')
sc.tl.pca(adata, n_comps=50, svd_solver='arpack')
scv.pp.moments(adata, n_pcs=5, n_neighbors=30)
adata = get_pseudotime(adata)
def data_type_tostr(adata, key=None):
if key is None:
for key in list(adata.var):
if adata.var[key][0] in [True, False]:
print('Transfering', key)
adata.var[key] = adata.var[key].map({True: 'True', False:'False'})
elif key in adata.var.keys():
if adata.var[key][0] in [True, False]:
print('Transfering', key)
adata.var[key] = adata.var[key].map({True: 'True', False:'False'})
return def get_sort_positions(arr):
positions = np.argsort(np.argsort(arr))
positions_normed = positions/(len(arr)-1)
return positions_normed
def get_metric_pseudotime(adata, t_key='latent_t'):
n_cells, n_genes = adata.shape
adata.var['spearmanr_pseudotime'] = 0.0
for i in range(n_genes):
correlation, _ = scipy.stats.spearmanr(adata.layers[t_key][:,i], adata.obs['velocity_pseudotime'])
adata.var['spearmanr_pseudotime'][i] = correlation
return adata def get_sort_t(adata):
t = adata.layers['fit_t_raw'].copy()
normed_t = adata.layers['fit_t_raw'].copy()
n_bins = 20
n_cells, n_genes = adata.shape
sort_t = np.zeros([n_cells, n_genes])
non_blank_gene = np.zeros(n_genes, dtype=int)
hist_all, bins_all = np.zeros([n_genes, n_bins]), np.zeros([n_genes, n_bins+1])
for i in range(n_genes):
gene_name = adata.var_names[i]
tmp = t[:,i].copy()
if np.isnan(tmp).sum():
non_blank_gene[i] = 1
continue
hist, bins = np.histogram(tmp, bins=n_bins)
hist_all[i], bins_all[i] = hist, bins
if not (0 in list(hist)):
if (tmp.min() < 0.1) and (tmp.max() > 0.8):
blank_start_bin_id = np.argmin(hist)
blank_end_bin_id = blank_start_bin_id
non_blank_gene[i] = 1
blank_start_bin = bins[blank_start_bin_id]
blank_end_bin = bins[blank_end_bin_id]
tmp = (tmp < blank_start_bin)*1 + tmp
else:
blank_end_bin = tmp.min()
else:
blank_start_bin_id = list(hist).index(0)
for j in range(blank_start_bin_id+1, len(hist)):
if hist[j] > 0:
blank_end_bin_id = j
break
blank_start_bin = bins[blank_start_bin_id]
blank_end_bin = bins[blank_end_bin_id]
tmp = (tmp < blank_start_bin)*1 + tmp
t[:,i] = tmp
tmp = tmp - blank_end_bin
tmp = tmp/tmp.max()
normed_t[:,i] = tmp
sort_t[:,i] = get_sort_positions(tmp)
adata.layers['latent_t'] = sort_t.copy()
adata.var['non_blank_gene'] = non_blank_gene.copy()
return adata adata_copy.var['non_blank_gene'].fillna(0)
import anndata as ad
import scanpy as sc
import numpy as np
import scipy
import matplotlib adata_copy = adata.copy()
adata_copy = get_sort_t(adata_copy)
adata_copy_1 = adata_copy.copy()
data_type_tostr(adata_copy_1)
adata_copy_1.var['non_blank_gene']=adata_copy_1.var['non_blank_gene'].fillna(0)
print(adata_copy_1)
adata_copy_1.write('data/tfvelo_rc1.h5ad')
thres_loss = np.percentile(adata_copy.var['min_loss'], 50)
adata_copy = adata_copy[:, adata_copy.var['min_loss'] < thres_loss]
thres_n_cells = adata_copy.X.shape[0] * 0.1
adata_copy = adata_copy[:, adata_copy.var['n_cells'] > thres_n_cells]
adata_copy = adata_copy[:, adata_copy.var['non_blank_gene']==0]
adata_copy = get_metric_pseudotime(adata_copy)
adata_copy = adata_copy[:, adata_copy.var['spearmanr_pseudotime'] > 0.8]
def show_adata(adata, save_name, show_all=0):
if show_all:
for i in range(int((len(adata.var_names)-1)/20)+1):
genes2show = adata.var_names[i*20: (i+1)*20]
Tfv.pl.velocity(adata, genes2show, ncols=4, add_outline=True, layers='na', dpi=300, fontsize=15, save='WX_y_'+save_name+'_'+str(i)) #layers='all'
if len(adata.obs['clusters'].cat.categories) > 10:
legend_loc = 'right margin'
else:
legend_loc = 'on data'
cutoff_perc = 20
Tfv.pl.velocity_embedding_stream(adata, vkey='velocity', use_derivative=False, density=2, basis='X_umap', \
cutoff_perc=cutoff_perc, smooth=0.5, fontsize=20, recompute=True, \
legend_loc=legend_loc, save='embedding_stream_'+save_name) #
return Tfv.tl.velocity_graph(adata_copy, basis=None, vkey='velocity', xkey='M_total')
adata_copy.uns['clusters_colors'] = adata.uns['clusters_colors']
show_adata(adata_copy, save_name='velo', show_all=1)
data_type_tostr(adata_copy)
print(adata_copy)
adata_copy.var['non_blank_gene']=adata_copy.var['non_blank_gene'].fillna(0)
adata_copy.write('data/tfvelo_TFvelo.h5ad')
|
Dear Starlitnightly, I hope this message finds you well. I am writing to inquire about the installation process for the TFvelo code. I have been trying to import the TFvelo module using "import TFvelo as TFv" in my Python script, but it appears that the TFvelo module is not available for installation through standard channels like pip. I have searched for the module on PyPI and other package repositories, but couldn't find a matching distribution. Could you please provide guidance on how to obtain and install the TFvelo module? I have already obtained the TFvelo code from the repository [repository URL] and downloaded it to my local machine. However, I am unsure of the correct installation procedure. Could you kindly provide instructions on how to install TFvelo or any specific dependencies that need to be satisfied beforehand? Thank you very much for your time and attention. I greatly appreciate any assistance you can provide in helping me install TFvelo and utilize its functionalities effectively. Looking forward to your kind response. Best regards,hyg |
Hi Zehua, We have provided a jupyter tutorial at https://github.com/xiaoyeye/TFvelo/blob/main/TFvelo_demo.ipynb for reproducing the results on pancreas dataset in the paper. |
Thanks for the reply, but you don't seem to have provided the result_path = 'TFvelo_pancreas_demo2/'
n_jobs = 28
adata = ad.read_h5ad(result_path + 'pp.h5ad')
TFv.tl.recover_dynamics(adata, n_jobs=n_jobs, max_iter=20, var_names='all',
WX_method='lsq_linear', WX_thres=20, max_n_TF=99, n_top_genes=2000,
fit_scaling=True, use_raw=0, init_weight_method="correlation",
n_time_points=1000)
adata.write(result_path + 'rc.h5ad') |
Hi,
I am using the pancera dataset with all parameters defaulted, why am I unable to reproduce the results of the paper? The code used is all functions within
TFvelo_run_demo.py
andTFvelo_analysis_demo.py.py
.Can you provide a jupyter tutorial to make it easy for us to see the intermediate analysis results?
SIncerely,
Zehua
The text was updated successfully, but these errors were encountered: