# FCA analysis - preprocessing

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import os
import sys


def MovePlots(plotpattern, subplotdir):
    os.system('mkdir -p '+str(sc.settings.figdir)+'/'+subplotdir)
    os.system('mv '+str(sc.settings.figdir)+'/*'+plotpattern+'** '+str(sc.settings.figdir)+'/'+subplotdir)


sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.figdir = './figures/preprocessing/'
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures

sys.executable

  from pandas.core.index import RangeIndex


scanpy==1.4.4 anndata==0.6.22 umap==0.5.1 numpy==1.20.0 scipy==1.6.2 pandas==1.2.1 scikit-learn==0.24.1 statsmodels==0.12.2


'/home/jovyan/my-conda-envs/anndata06/bin/python'

## Import data
Import sample metadata

In [2]:
data_dir = '/nfs/users/nfs_l/lg18/team292/lg18/gonads/data/scRNAseq/FCA/'
meta = pd.read_csv(data_dir+'rawdata/meta_noimmune_v2.csv',index_col=0)
meta['stage'] = meta['stage'].astype('str')
plotmeta = list(meta.columns)
plotmeta.append('sample')
print('Number of samples: ', meta.index.size)

Number of samples:  53


In [3]:
meta

Unnamed: 0_level_0,individual,location,stage,sex,study,batch_collection,process,cryopreserved,TP,5v1.1
sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
FCA_GND8047885,F81,Gonad,8.8,female,FCA,A,CD45-,No,U,5v1.1
FCA_GND8125923,F86,Gonad,7.5,female,FCA,B,CD45-,No,U,5v1.1
FCA_GND8125925,F87,Gonad,11.0,female,FCA,B,CD45-,No,U,5v1.1
FCA_GND8103050,F83,Gonad,17.0,female,FCA,C,CD45-,No,U,5v1.1
FCA_GND8103053,F84,Gonad,9.0,female,FCA,Cpoor,CD45-,No,U,5v1.1
FCA_GND8622625,F89,Gonad,8.4,female,FCA,E,total,No,U,5v1.1
FCA_GND8622626,F89,Gonad,8.4,female,FCA,E,total,No,U,5v1.1
FCA_GND8622628,F91,Gonad,11.0,female,FCA,E,CD45-,No,U,5v1.1
FCA_GND8715408,F96,Gonad,14.0,female,FCA,F,CD45-,No,Med,5v1.1
FCA_GND8289580,F88,Gonad,11.0,male,FCA,D,CD45-,No,U,5v1.1


## Preprocessing
### Load 10x 

Filter: 1) cells (< 10 genes); 2) genes (< 3 cells) 

Quantify: 1) % mitochondrial genes; 2) total counts


In [4]:
holder = []
for sample in meta.index:
    print(sample)
    # Load 10x data as AnnData
    holder.append(sc.read_10x_mtx(data_dir+'rawdata/'+sample+'/filtered_feature_bc_matrix/',cache=True)) 
    # Set names of observation as sample + _ + barcode/probe
    holder[-1].obs_names = [sample+'_'+i.split('-')[0] for i in holder[-1].obs_names]
    # Filer genes expressed in less than 3 cells
    sc.pp.filter_genes(holder[-1], min_cells=3)
    # Filer cells with less than 10 genes expressed
    sc.pp.filter_cells(holder[-1], min_genes=150)
    # add in metadata
    holder[-1].obs['sample'] = sample
    for val in meta.columns:
        holder[-1].obs[val] = meta[val][sample]
    # Extract mitochondial genes
    mito_genes = [name for name in holder[-1].var_names if name.startswith('MT-')]
    #for each cell compute fraction of counts in mito genes vs. all genes
    #the `.A1` is only necessary, as X is sparse - it transform to a dense array after summing
    holder[-1].obs['percent_mito'] = np.sum(
        holder[-1][:, mito_genes].X, axis=1).A1 / np.sum(holder[-1].X, axis=1).A1
    #add the total counts per cell as observations-annotation to adata
    holder[-1].obs['n_counts'] = holder[-1].X.sum(axis=1).A1
    print('Total number of cells: {:d}'.format(holder[-1].n_obs))
    print('Total number of genes: {:d}'.format(holder[-1].n_vars))

FCA_GND8047885
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8047885-filtered_feature_bc_matrix-matrix.h5ad


  if is_categorical(df_full[k]):


filtered out 13510 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 15 cells that haveless than 150 genes expressed
Total number of cells: 9227
Total number of genes: 20028
FCA_GND8125923
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8125923-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12810 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 66 cells that haveless than 150 genes expressed
Total number of cells: 4147
Total number of genes: 20728
FCA_GND8125925
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8125925-filtered_feature_bc_matrix-matrix.h5ad
filtered out 14764 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 136 cells that haveless than 150 genes expressed
Total number of cells: 3093
Total number of genes: 18774
FCA_GND8103050
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8103050-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12350 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 18 cells that haveless than 150 genes expressed
Total number of cells: 11516
Total number of genes: 21188
FCA_GND8103053
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8103053-filtered_feature_bc_matrix-matrix.h5ad
filtered out 15821 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 9 cells that haveless than 150 genes expressed
Total number of cells: 3472
Total number of genes: 17717
FCA_GND8622625
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8622625-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16461 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 35 cells that haveless than 150 genes expressed
Total number of cells: 1157
Total number of genes: 17077
FCA_GND8622626
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8622626-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16577 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 25 cells that haveless than 150 genes expressed
Total number of cells: 1056
Total number of genes: 16961
FCA_GND8622628
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8622628-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12668 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 126 cells that haveless than 150 genes expressed
Total number of cells: 5227
Total number of genes: 20870
FCA_GND8715408
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8715408-filtered_feature_bc_matrix-matrix.h5ad
filtered out 15403 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 489 cells that haveless than 150 genes expressed
Total number of cells: 2890
Total number of genes: 18135
FCA_GND8289580
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8289580-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13595 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 80 cells that haveless than 150 genes expressed
Total number of cells: 10833
Total number of genes: 19943
FCA_GND8622630
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8622630-filtered_feature_bc_matrix-matrix.h5ad
filtered out 21362 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


Total number of cells: 100
Total number of genes: 12176
FCA_GND8715519
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8715519-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11260 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 15 cells that haveless than 150 genes expressed
Total number of cells: 5825
Total number of genes: 22278
FCA_GND8784460
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8784460-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12539 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 41 cells that haveless than 150 genes expressed
Total number of cells: 6965
Total number of genes: 20999
FCA_GND8784458
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8784458-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12483 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


Total number of cells: 8703
Total number of genes: 21055
FCA_GND8810844
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810844-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13828 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 31 cells that haveless than 150 genes expressed
Total number of cells: 3024
Total number of genes: 19710
FCA_GND8810845
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810845-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16301 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 3 cells that haveless than 150 genes expressed
Total number of cells: 1761
Total number of genes: 17237
FCA_GND8810848
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810848-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13848 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 17 cells that haveless than 150 genes expressed
Total number of cells: 2733
Total number of genes: 19690
FCA_GND8810846
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810846-filtered_feature_bc_matrix-matrix.h5ad
filtered out 15905 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 243 cells that haveless than 150 genes expressed
Total number of cells: 1988
Total number of genes: 17633
FCA_GND8810850
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810850-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16261 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 5 cells that haveless than 150 genes expressed
Total number of cells: 2197
Total number of genes: 17277
FCA_GND8810851
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810851-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11461 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 6 cells that haveless than 150 genes expressed
Total number of cells: 11159
Total number of genes: 22077
FCA_GND8810852
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND8810852-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13797 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 31 cells that haveless than 150 genes expressed
Total number of cells: 5265
Total number of genes: 19741
FCA_GND9331965
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331965-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11304 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 46 cells that haveless than 150 genes expressed
Total number of cells: 10326
Total number of genes: 22234
FCA_GND9331966
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331966-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11973 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 47 cells that haveless than 150 genes expressed
Total number of cells: 27637
Total number of genes: 21565
FCA_GND9331967
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331967-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12509 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 14345
Total number of genes: 21029
FCA_GND9331968
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331968-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11962 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 24 cells that haveless than 150 genes expressed
Total number of cells: 11138
Total number of genes: 21576
FCA_GND9331969
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331969-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16481 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 869
Total number of genes: 17057
FCA_GND9331970
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9331970-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13196 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 12288
Total number of genes: 20342
FCA_GND9295208
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9295208-filtered_feature_bc_matrix-matrix.h5ad
filtered out 16020 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 43 cells that haveless than 150 genes expressed
Total number of cells: 1527
Total number of genes: 17518
FCA_GND9295209
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9295209-filtered_feature_bc_matrix-matrix.h5ad
filtered out 15625 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 45 cells that haveless than 150 genes expressed
Total number of cells: 1754
Total number of genes: 17913
FCA_GND9295210
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9295210-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12397 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


Total number of cells: 10000
Total number of genes: 21141
FCA_GND9295212
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9295212-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12614 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


Total number of cells: 10000
Total number of genes: 20924
FCA_GND9332062
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9332062-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10683 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 82 cells that haveless than 150 genes expressed
Total number of cells: 21584
Total number of genes: 22855
FCA_GND9332064
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9332064-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13879 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 25 cells that haveless than 150 genes expressed
Total number of cells: 1573
Total number of genes: 19659
FCA_GND9332065
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9332065-filtered_feature_bc_matrix-matrix.h5ad
filtered out 14663 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 8 cells that haveless than 150 genes expressed
Total number of cells: 2240
Total number of genes: 18875
FCA_GND9332061
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-FCA_GND9332061-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13213 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 10673
Total number of genes: 20325
HD_F_GON9460406
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9460406-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10700 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 7 cells that haveless than 150 genes expressed
Total number of cells: 10941
Total number of genes: 22838
HD_F_GON9460407
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9460407-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10474 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 4 cells that haveless than 150 genes expressed
Total number of cells: 10077
Total number of genes: 23064
HD_F_GON9460408
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9460408-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11077 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 63 cells that haveless than 150 genes expressed
Total number of cells: 10739
Total number of genes: 22461
HD_F_GON9480063
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9480063-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10778 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 3 cells that haveless than 150 genes expressed
Total number of cells: 8631
Total number of genes: 22760
HD_F_GON9480064
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9480064-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10808 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 8374
Total number of genes: 22730
HD_F_GON9480066
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9480066-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12824 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 1 cells that haveless than 150 genes expressed
Total number of cells: 5971
Total number of genes: 20714
HD_F_GON9479968
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9479968-filtered_feature_bc_matrix-matrix.h5ad
filtered out 14206 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 15 cells that haveless than 150 genes expressed
Total number of cells: 1538
Total number of genes: 19332
HD_F_GON9479969
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9479969-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12506 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 286 cells that haveless than 150 genes expressed
Total number of cells: 4714
Total number of genes: 21032
HD_F_GON9479970
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9479970-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13048 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 7 cells that haveless than 150 genes expressed
Total number of cells: 6280
Total number of genes: 20490
HD_F_GON9699334
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9699334-filtered_feature_bc_matrix-matrix.h5ad
filtered out 9271 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 55 cells that haveless than 150 genes expressed
Total number of cells: 8792
Total number of genes: 24267
HD_F_GON9699332
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9699332-filtered_feature_bc_matrix-matrix.h5ad
filtered out 9818 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 40 cells that haveless than 150 genes expressed
Total number of cells: 8192
Total number of genes: 23720
HD_F_GON9699337
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9699337-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11816 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 58 cells that haveless than 150 genes expressed
Total number of cells: 2029
Total number of genes: 21722
HD_F_GON9883867
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883867-filtered_feature_bc_matrix-matrix.h5ad
filtered out 11305 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 46 cells that haveless than 150 genes expressed
Total number of cells: 7598
Total number of genes: 22233
HD_F_GON9883866
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883866-filtered_feature_bc_matrix-matrix.h5ad
filtered out 13964 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 10 cells that haveless than 150 genes expressed
Total number of cells: 2007
Total number of genes: 19574
HD_F_GON9883862
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883862-filtered_feature_bc_matrix-matrix.h5ad
filtered out 10134 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 35 cells that haveless than 150 genes expressed
Total number of cells: 7562
Total number of genes: 23404
HD_F_GON9883863
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883863-filtered_feature_bc_matrix-matrix.h5ad
filtered out 9946 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 49 cells that haveless than 150 genes expressed
Total number of cells: 8046
Total number of genes: 23592
HD_F_GON9883859
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883859-filtered_feature_bc_matrix-matrix.h5ad
filtered out 14469 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 67 cells that haveless than 150 genes expressed
Total number of cells: 4562
Total number of genes: 19069
HD_F_GON9883868
... reading from cache file cache/nfs-users-nfs_l-lg18-team292-lg18-gonads-data-scRNAseq-FCA-rawdata-HD_F_GON9883868-filtered_feature_bc_matrix-matrix.h5ad
filtered out 12408 genes that are detectedin less than 3 cells


Trying to set attribute `.var` of view, making a copy.


filtered out 9 cells that haveless than 150 genes expressed
Total number of cells: 5216
Total number of genes: 21130


In [None]:
# confirm N samples
print(len(holder))
# merge datasets
adata = holder[0].concatenate(holder[1:],join='outer',index_unique=None)
# copy of this matrix in Compressed Sparse Row format
adata.X = adata.X.tocsr()
adata

53


### QC pplots

Plot distributions of the values n_genes, n_counts and percent_mito

In [None]:
print('Total number of cells: {:d}'.format(adata.n_obs))
print('Total number of genes: {:d}'.format(adata.n_vars))

### Filter cells with few genes
Check number of genes per cell distribution and filter cells accordingly

In [None]:
plt.hist(adata.obs['n_genes'], bins = 100)
plt.axvline(200, linestyle = '--', color = 'red')

In [None]:
sc.pp.filter_cells(adata, min_genes=300)

In [None]:
print('Total number of cells: {:d}'.format(adata.n_obs))
print('Total number of genes: {:d}'.format(adata.n_vars))

### Filter genes expressed in less than 3 cells

In [None]:
sc.pp.filter_genes(adata, min_cells=3)

In [None]:
print('Total number of cells: {:d}'.format(adata.n_obs))
print('Total number of genes: {:d}'.format(adata.n_vars))


### Filter cells with large % mitochondrial genes

In [None]:
plt.hist(adata.obs['percent_mito'], bins = 100, cumulative=True)
plt.axvline(0.1, linestyle = '--', color = 'red')
plt.axvline(0.2, linestyle = '--', color = 'darkred')
plt.axhline(adata.n_obs*0.99, linestyle = '-', color = 'green')

In [None]:
# >20%
adata = adata[adata.obs['percent_mito'] < 0.2, :]

In [None]:
print('Total number of cells: {:d}'.format(adata.n_obs))
print('Total number of genes: {:d}'.format(adata.n_vars))

# Add metadata 

In [None]:
# format some metadata
# PCW as numerical
adata.obs['PCW'] = adata.obs.stage.astype('float').tolist()
# source: L=london; N=newcastle
adata.obs['sample_source'] = 'N'
idx = np.array(['Hrv' in i for i in adata.obs['individual']])
adata.obs.at[ idx, 'sample_source']  = 'L'

In [None]:
# Add cluster name and doublet information
clu_annot = pd.read_csv('figures_manual_annotation/clustering_metadata.csv', header=0, index_col=0)
import collections 

if collections.Counter(adata.obs.index) == collections.Counter(clu_annot.index): 
    print ("The lists are identical") 
else : 
    print ("The lists are not identical") 
    
vars2import = ['clusters_manual','old_clusters','louvain', 'is_doublet', 'scrublet_cluster_score', 'scrublet_score']
for var in vars2import:
    adata.obs[var] = clu_annot[var]

In [None]:
adata.obs.head()

# Save

In [None]:
# del adata.obs['location']
del adata.obs['clusters']

In [None]:
adata.write('/nfs/team292/lg18/with_valentina/FCA-M5-annotatedCluster4Seurat.h5ad')