### Gene dynamics with snRNA/ATAC-seq (10X multiome) samples

In [1]:
import numpy as np
import scanpy as sc 
import pandas as pd
import anndata
sc.settings.figdir = '/home/jovyan/MULTIOME_july2021/figures_germ/'

In [2]:
sc.settings.set_figure_params(dpi=80)  # low dpi (dots per inch) yields small inline figures

In [3]:
import rpy2.rinterface_lib.callbacks
import logging

In [4]:
# Ignore R warning messages
#Note: this can be commented out to get more verbose R output
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

In [5]:
import anndata2ri
anndata2ri.activate()

In [6]:
%load_ext rpy2.ipython

In [7]:
%%R
## Install dependencies
#devtools::install_github("aertslab/RcisTarget")
#devtools::install_github("aertslab/AUCell")
        
#devtools::install_github("aertslab/cisTopic")
library(cisTopic)
library(Matrix)
library(Seurat)
library(dplyr)
library(data.table)
library(GenomicRanges)
library(EnsDb.Hsapiens.v86)
#library(cicero)
library(Signac)
#library(clustree)
library(Matrix)
library(reticulate)
#if (!requireNamespace("remotes", quietly = TRUE)) {
#  install.packages("remotes")
#}
#remotes::install_github("mojaveazure/seurat-disk")

library(SeuratDisk)

#### Load ATAC and RNA from 10X multiome samples

### RNA 

In [38]:
rnas = []
donors = ['Hrv15', 'Hrv39', 'Hrv3', 'Hrv41', 'Hrv58', 'Hrv65', 'Hrv91', 'Hrv92', 'Hrv93']
for i in donors: 
    print(i)
    rna_donor = sc.read('/nfs/team292/lg18/with_valentina/gonadsV2_revision/multiomics_rna_counts_denoised_' + i + '.h5ad')
    rnas.append(rna_donor)

Hrv15
Hrv39
Hrv3
Hrv41
Hrv58
Hrv65
Hrv91
Hrv92
Hrv93


In [39]:
# confirm N samples
print(len(rnas))
# merge datasets
rna = rnas[0].concatenate(rnas[1:],join='outer',index_unique=None)
# copy of this matrix in Compressed Sparse Row format
rna.X = rna.X.tocsr()
rna

9


AnnData object with n_obs × n_vars = 73218 × 33997
    obs: 'n_genes', 'sample', 'donor', 'Library_ATAC', 'Library_RNA', 'iRods_path', 'percent_mito', 'n_counts', 'batch', 'scrublet_score', 'scrublet_cluster_score', 'zscore', 'bh_pval', 'bonf_pval'
    var: 'gene_ids-10', 'feature_types-10', 'gene_ids-11', 'feature_types-11', 'gene_ids-12', 'feature_types-12', 'gene_ids-13', 'feature_types-13', 'gene_ids-9', 'feature_types-9', 'n_cells', 'gene_ids-0', 'feature_types-0', 'n_cells-0-0', 'n_cells-1-0', 'n_cells-10-0', 'n_cells-11-0', 'n_cells-12-0', 'n_cells-13-0', 'n_cells-2-0', 'n_cells-3-0', 'n_cells-4-0', 'n_cells-5-0', 'n_cells-6-0', 'n_cells-7-0', 'n_cells-8-0', 'n_cells-9-0', 'gene_ids-1', 'feature_types-1', 'n_cells-0-1', 'n_cells-1-1', 'n_cells-10-1', 'n_cells-11-1', 'n_cells-12-1', 'n_cells-13-1', 'n_cells-2-1', 'n_cells-3-1', 'n_cells-4-1', 'n_cells-5-1', 'n_cells-6-1', 'n_cells-7-1', 'n_cells-8-1', 'n_cells-9-1', 'gene_ids-2', 'feature_types-2', 'n_cells-0-2', 'n_cells-1-2', '

#### 1. Import annotations for germ cells of both males and females

In [57]:
path_to_multiome = '/nfs/team292/lg18/with_valentina/gonadsV2_revision/'
germcells_females = pd.read_csv(path_to_multiome + 'multiomics_female_germcells.csv', index_col = 0)
germcells_females.head()

Unnamed: 0,n_genes,sample,donor,Library_ATAC,Library_RNA,iRods_path,percent_mito,n_counts,batch,scrublet_score,...,bonf_pval,pcw,lineage,celltype_samplespecific,S_score,G2M_score,phase,leiden,leiden_R,celltype
HCA_F_GON10535495_AAACCAACACGAATTT,5728,HCA_F_GON10535495,Hrv92,HCA_F_GON10535399,HCA_F_GON10535495,/seq/illumina/cellranger-arc/cellranger-arc101...,0.014532,4735.2485,0,0.068746,...,1.0,8.6,GermCells,,0.237842,0.087936,S,0,0,PGC
HCA_F_GON10535495_AACCCGCAGTTTGAGC,5456,HCA_F_GON10535495,Hrv92,HCA_F_GON10535399,HCA_F_GON10535495,/seq/illumina/cellranger-arc/cellranger-arc101...,0.017976,4645.233,0,0.043975,...,1.0,8.6,GermCells,,0.037234,-0.421033,S,0,0,PGC
HCA_F_GON10535495_AACGACAAGCGGCTGT,5901,HCA_F_GON10535495,Hrv92,HCA_F_GON10535399,HCA_F_GON10535495,/seq/illumina/cellranger-arc/cellranger-arc101...,0.01817,4060.9746,0,0.090307,...,1.0,8.6,GermCells,,0.114108,4.491824,G2M,0,0,PGC
HCA_F_GON10535495_AACTTAGTCTTGTCCA,6781,HCA_F_GON10535495,Hrv92,HCA_F_GON10535399,HCA_F_GON10535495,/seq/illumina/cellranger-arc/cellranger-arc101...,0.020457,4499.4067,0,0.075205,...,1.0,8.6,GermCells,,0.00228,6.15881,G2M,0,0,PGC
HCA_F_GON10535495_AAGACCAAGCACCACA,4881,HCA_F_GON10535495,Hrv92,HCA_F_GON10535399,HCA_F_GON10535495,/seq/illumina/cellranger-arc/cellranger-arc101...,0.015051,4630.9404,0,0.04601,...,1.0,8.6,GermCells,,-0.099797,-0.189805,G1,0,0,PGC


In [58]:
germcells_females['Library_ATAC'].value_counts()

FCA_GND10288180      1171
HCA_F_GON10687818    1091
FCA_GND10288176       942
FCA_GND10288177       923
FCA_GND10288179       781
FCA_GND10288178       533
HCA_F_GON10535399     252
Name: Library_ATAC, dtype: int64

In [59]:
germcells_male = pd.read_csv(path_to_multiome + 'multiomics_male_germcells.csv', index_col = 0)
germcells_male.head()

Unnamed: 0,n_genes,sample,donor,Library_ATAC,Library_RNA,iRods_path,percent_mito,n_counts,batch,scrublet_score,...,bh_pval,bonf_pval,pcw,lineage,celltype_samplespecific,S_score,G2M_score,phase,leiden,celltype
HCA_F_GON10713286_AAACGCGCACTGGCCA,5701,HCA_F_GON10713286,Hrv41,HCA_F_GON10687820,HCA_F_GON10713286,/seq/illumina/cellranger-arc/cellranger-arc101...,0.001894,18223.0,0,0.03458,...,0.901686,1.0,8.8,GermCells,,1.146592,0.513035,S,2,PGC_mitotic
HCA_F_GON10713286_AACAGCAAGAGGAGGA,6154,HCA_F_GON10713286,Hrv41,HCA_F_GON10687820,HCA_F_GON10713286,/seq/illumina/cellranger-arc/cellranger-arc101...,0.002846,20717.0,0,0.068073,...,0.901686,1.0,8.8,GermCells,,0.037504,-0.186892,S,0,PGC
HCA_F_GON10713286_AACAGCAAGCTAAAGG,5009,HCA_F_GON10713286,Hrv41,HCA_F_GON10687820,HCA_F_GON10713286,/seq/illumina/cellranger-arc/cellranger-arc101...,0.001948,15032.0,0,0.04878,...,0.901686,1.0,8.8,GermCells,,-0.34765,-0.672939,G1,0,PGC
HCA_F_GON10713286_AACAGCAAGTCAATCA,8263,HCA_F_GON10713286,Hrv41,HCA_F_GON10687820,HCA_F_GON10713286,/seq/illumina/cellranger-arc/cellranger-arc101...,0.003338,38623.0,0,0.305785,...,3.7e-05,0.010037,8.8,GermCells,,0.018986,3.420874,G2M,2,PGC_mitotic
HCA_F_GON10713286_AACATCATCCACCCTG,5685,HCA_F_GON10713286,Hrv41,HCA_F_GON10687820,HCA_F_GON10713286,/seq/illumina/cellranger-arc/cellranger-arc101...,0.001917,17283.0,0,0.031335,...,0.901686,1.0,8.8,GermCells,,0.281357,-0.657518,S,0,PGC


In [60]:
germcells = germcells_females.append(germcells_male)
germcells['Library_ATAC'].value_counts(dropna = False)

FCA_GND10288180      1171
HCA_F_GON10687818    1091
FCA_GND10288176       942
FCA_GND10288177       923
FCA_GND10288179       781
FCA_GND10288178       533
HCA_F_GON10687819     475
HCA_F_GON10535399     252
HCA_F_GON10687820     194
HD_F_GON9525612        38
HD_F_GON9525611        32
Name: Library_ATAC, dtype: int64

In [61]:
code_correspondence = {'FCA_GND10288180' : '05', 'HCA_F_GON10687818' : '15', 'FCA_GND10288176' : '01', 
                      'FCA_GND10288177' : '02', 'FCA_GND10288179' : '04', 'FCA_GND10288178' : '03', 
                      'HCA_F_GON10687819' : '16', 'HCA_F_GON10535399' : '14', 'HCA_F_GON10687820' : '17', 
                      'HD_F_GON9525612' : '22', 'HD_F_GON9525611' : '21'}

In [62]:
germcells['ATAC_code'] = germcells['Library_ATAC'].map(code_correspondence)
germcells['ATAC_code'].value_counts(dropna = False)

05    1171
15    1091
01     942
02     923
04     781
03     533
16     475
14     252
17     194
22      38
21      32
Name: ATAC_code, dtype: int64

In [63]:
germcells['celltype'].value_counts(dropna = False)

oogonia_STRA8       2215
PGC                 1722
oogonia_meiotic     1178
preOocyte            311
Doublet              274
preSpermatogonia     221
oocyte               162
PGC_mitotic          152
GC                   105
lowQC                 92
Name: celltype, dtype: int64

In [64]:
germcells['celltype'] = ['pre_spermatogonia' if i == 'preSpermatogonia' else i for i in germcells['celltype']]
germcells['celltype'] = ['PGC' if i == 'PGC_mitotic' else i for i in germcells['celltype']]
germcells['celltype'] = ['pre_oocyte' if i == 'preOocyte' else i for i in germcells['celltype']]

germcells = germcells[germcells['celltype'] != 'Doublet']
germcells = germcells[germcells['celltype'] != 'lowQC']
germcells['celltype'].value_counts(dropna = False)

oogonia_STRA8        2215
PGC                  1874
oogonia_meiotic      1178
pre_oocyte            311
pre_spermatogonia     221
oocyte                162
GC                    105
Name: celltype, dtype: int64

In [66]:
rna.obs.head()

Unnamed: 0,n_genes,sample,donor,Library_ATAC,Library_RNA,iRods_path,percent_mito,n_counts,batch,scrublet_score,scrublet_cluster_score,zscore,bh_pval,bonf_pval,germcell_RNA
HD_F_GON9525419_AAACAGCCACCTCAGG,4816,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000665,12037.0,0,0.08,0.130409,0.409733,0.784586,1.0,
HD_F_GON9525419_AAACATGCAGGCTAGA,4581,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000544,12873.0,0,0.140127,0.1253,0.345647,0.784586,1.0,
HD_F_GON9525419_AAACCAACATACTCCT,1754,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.0,2951.0,0,0.050967,0.050967,-0.586771,0.784586,1.0,
HD_F_GON9525419_AAACCAACATGGTTAT,1880,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000713,2807.0,0,0.164286,0.151515,0.674491,0.776981,1.0,
HD_F_GON9525419_AAACCGAAGCGTGCAC,3804,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000357,8394.0,0,0.164286,0.164286,0.834682,0.776981,1.0,


In [67]:
germcells.donor.value_counts()

Hrv65    1907
Hrv58    1678
Hrv91    1038
Hrv39     524
Hrv3      470
Hrv92     202
Hrv41     194
Hrv15      53
Name: donor, dtype: int64

In [68]:
rna.obs.donor.value_counts()

Hrv93    19897
Hrv92    12190
Hrv65     7423
Hrv41     6774
Hrv39     6677
Hrv3      6304
Hrv58     6269
Hrv91     4862
Hrv15     2822
Name: donor, dtype: int64

#### 2. Add germ cell annotation to RNA object

In [69]:
rna.obs['germcell_RNA'] = rna.obs_names.map(germcells['celltype'].to_dict())

In [70]:
rna.obs['germcell_RNA'].value_counts(dropna = False)

NaN                  67152
oogonia_STRA8         2215
PGC                   1874
oogonia_meiotic       1178
pre_oocyte             311
pre_spermatogonia      221
oocyte                 162
GC                     105
Name: germcell_RNA, dtype: int64

In [71]:
rna.obs['germcell_RNA'] = rna.obs['germcell_RNA'].astype(str)
rna = rna[[i not in ['nan'] for i in rna.obs['germcell_RNA']]]

  res = method(*args, **kwargs)


In [72]:
rna

View of AnnData object with n_obs × n_vars = 6066 × 33997
    obs: 'n_genes', 'sample', 'donor', 'Library_ATAC', 'Library_RNA', 'iRods_path', 'percent_mito', 'n_counts', 'batch', 'scrublet_score', 'scrublet_cluster_score', 'zscore', 'bh_pval', 'bonf_pval', 'germcell_RNA'
    var: 'gene_ids-10', 'feature_types-10', 'gene_ids-11', 'feature_types-11', 'gene_ids-12', 'feature_types-12', 'gene_ids-13', 'feature_types-13', 'gene_ids-9', 'feature_types-9', 'n_cells', 'gene_ids-0', 'feature_types-0', 'n_cells-0-0', 'n_cells-1-0', 'n_cells-10-0', 'n_cells-11-0', 'n_cells-12-0', 'n_cells-13-0', 'n_cells-2-0', 'n_cells-3-0', 'n_cells-4-0', 'n_cells-5-0', 'n_cells-6-0', 'n_cells-7-0', 'n_cells-8-0', 'n_cells-9-0', 'gene_ids-1', 'feature_types-1', 'n_cells-0-1', 'n_cells-1-1', 'n_cells-10-1', 'n_cells-11-1', 'n_cells-12-1', 'n_cells-13-1', 'n_cells-2-1', 'n_cells-3-1', 'n_cells-4-1', 'n_cells-5-1', 'n_cells-6-1', 'n_cells-7-1', 'n_cells-8-1', 'n_cells-9-1', 'gene_ids-2', 'feature_types-2', 'n_cells

In [73]:
rna.obs['ATAC_code'] = rna.obs_names.map(germcells['ATAC_code'].to_dict())

Trying to set attribute `.obs` of view, copying.


In [74]:
rna.obs.head()

Unnamed: 0,n_genes,sample,donor,Library_ATAC,Library_RNA,iRods_path,percent_mito,n_counts,batch,scrublet_score,scrublet_cluster_score,zscore,bh_pval,bonf_pval,germcell_RNA,ATAC_code
HD_F_GON9525419_AAACCGGCAGCAACCT,8129,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000697,31547.0,0,0.140127,0.224911,1.59516,0.776981,1.0,PGC,21
HD_F_GON9525419_ACTGAAACATCCCGCT,5454,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000193,15538.0,0,0.091346,0.08545,-0.154214,0.784586,1.0,PGC,21
HD_F_GON9525419_AGGTTGCGTGGAAGGC,3936,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000527,7590.0,0,0.08,0.08545,-0.154214,0.784586,1.0,PGC,21
HD_F_GON9525419_ATAGCATGTGTTGCAA,4524,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.000104,9627.0,0,0.091346,0.127816,0.377212,0.784586,1.0,GC,21
HD_F_GON9525419_CAGCCTAAGAGGCTAA,6044,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,/seq/illumina/cellranger-arc/cellranger-arc101...,0.001694,15938.0,0,0.08545,0.08545,-0.154214,0.784586,1.0,PGC,21


In [75]:
for i in rna.var.columns: 
    if i != 'gene_ids-0':
        del rna.var[i]

In [76]:
for i in rna.obs.columns: 
    if i not in ['sample', 'donor', 'Library_ATAC', 'Library_RNA', 'germcell_RNA', 'ATAC_code']:
        del rna.obs[i]

In [77]:
rna.X

<6066x33997 sparse matrix of type '<class 'numpy.float32'>'
	with 31013008 stored elements in Compressed Sparse Row format>

In [78]:
rna.write('/nfs/team292/vl6/my_MULTIOME_dir/germcells_july2021/germcells__RNAseq_multiome.h5ad')

... storing 'sample' as categorical
... storing 'donor' as categorical
... storing 'Library_ATAC' as categorical
... storing 'Library_RNA' as categorical
... storing 'germcell_RNA' as categorical
... storing 'ATAC_code' as categorical


In [79]:
np.isnan(rna.X.toarray()).sum()

0

#### 3. Make Seurat object 

In [80]:
outdir = "/nfs/team292/vl6/my_MULTIOME_dir/germcells_july2021/"
experiment_prefix = 'germcells_'

In [81]:
%%R -i outdir -i experiment_prefix -o RNAseq
Convert(paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5ad"),  
        dest = paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5seurat"), overwrite = TRUE, verbose = TRUE)

RNAseq <- LoadH5Seurat(paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5seurat"))
RNAseq

An object of class Seurat 
33997 features across 6066 samples within 1 assay 
Active assay: RNA (33997 features, 0 variable features)


In [82]:
%%R 

print(table(RNAseq@meta.data$germcell_RNA))


               GC               PGC            oocyte     oogonia_STRA8 
              105              1874               162              2215 
  oogonia_meiotic        pre_oocyte pre_spermatogonia 
             1178               311               221 


### ATAC

In [83]:
%%R -i outdir -i experiment_prefix

ATAC_Seurat <- readRDS(file = paste0(outdir, experiment_prefix, "_full.rds"))

In [84]:
%%R 

print(table(ATAC_Seurat@meta.data$cell_type))


              PGC                GC     oogonia_STRA8   oogonia_meiotic 
             3016               903              2467              1466 
       pre_oocyte            oocyte pre_spermatogonia 
              148               442               459 


#### 1. Select barcodes from multiomic samples 

In [85]:
rna.obs['transfer_barcode'] = [i.split('_')[-1] for i in rna.obs_names]

In [86]:
rna.obs['transfer_barcode'] = rna.obs['transfer_barcode'].astype(str)

In [87]:
rna.obs['ATAC_code'] = rna.obs['ATAC_code'].astype(str)

In [88]:
rna.obs['transfer_barcode2'] = rna.obs['ATAC_code'] + '-' + rna.obs['transfer_barcode'] + '-1'

In [89]:
rna.obs.head()

Unnamed: 0,sample,donor,Library_ATAC,Library_RNA,germcell_RNA,ATAC_code,transfer_barcode,transfer_barcode2
HD_F_GON9525419_AAACCGGCAGCAACCT,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,AAACCGGCAGCAACCT,21-AAACCGGCAGCAACCT-1
HD_F_GON9525419_ACTGAAACATCCCGCT,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,ACTGAAACATCCCGCT,21-ACTGAAACATCCCGCT-1
HD_F_GON9525419_AGGTTGCGTGGAAGGC,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,AGGTTGCGTGGAAGGC,21-AGGTTGCGTGGAAGGC-1
HD_F_GON9525419_ATAGCATGTGTTGCAA,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,GC,21,ATAGCATGTGTTGCAA,21-ATAGCATGTGTTGCAA-1
HD_F_GON9525419_CAGCCTAAGAGGCTAA,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,CAGCCTAAGAGGCTAA,21-CAGCCTAAGAGGCTAA-1


In [90]:
to_keep = rna.obs['transfer_barcode2'].to_list()

In [91]:
len(to_keep)

6066

In [92]:
%%R -i to_keep

to_keep_vector = unlist(to_keep, recursive = TRUE, use.names = TRUE)
print(head(to_keep_vector))
ATAC_Seurat <- subset(ATAC_Seurat, cells = to_keep_vector)
ATAC_Seurat

[1] "21-AAACCGGCAGCAACCT-1" "21-ACTGAAACATCCCGCT-1" "21-AGGTTGCGTGGAAGGC-1"
[4] "21-ATAGCATGTGTTGCAA-1" "21-CAGCCTAAGAGGCTAA-1" "21-CATAGGTTCACAGCCA-1"
An object of class Seurat 
120127 features across 5605 samples within 1 assay 
Active assay: peaks (120127 features, 0 variable features)
 3 dimensional reductions calculated: cisTopics, umap, harmony


In [93]:
%%R 

print(table(ATAC_Seurat@meta.data$cell_type))
print(table(ATAC_Seurat@meta.data$individual))
print(table(RNAseq@meta.data$donor))
print(table(RNAseq@meta.data$germcell))


              PGC                GC     oogonia_STRA8   oogonia_meiotic 
             1692               524              1737              1005 
       pre_oocyte            oocyte pre_spermatogonia 
              101               342               204 

  F81  Hrv3 Hrv10 Hrv13 Hrv15 Hrv17 Hrv18 Hrv21 Hrv39 Hrv41 Hrv49 Hrv50 Hrv54 
    0   377     0     0    47     0     0     0   511   139     0     0     0 
Hrv55 Hrv58 Hrv59 Hrv65 Hrv91 Hrv92 
    0  1532     0  1834   973   192 

 Hrv3 Hrv15 Hrv39 Hrv41 Hrv58 Hrv65 Hrv91 Hrv92 
  470    53   524   194  1678  1907  1038   202 

               GC               PGC            oocyte     oogonia_STRA8 
              105              1874               162              2215 
  oogonia_meiotic        pre_oocyte pre_spermatogonia 
             1178               311               221 


In [94]:
%%R -i outdir -i experiment_prefix

saveRDS(ATAC_Seurat, file = paste0(outdir, experiment_prefix, "_ATAC_multiome.rds"))

In [95]:
%%R -o to_keep2

to_keep2 = colnames(ATAC_Seurat)

In [96]:
len(to_keep2)

5605

In [97]:
to_keep2_list = []
for i in range(len(to_keep2)):
    to_keep2_list.append(to_keep2[i])

In [98]:
len(to_keep2_list)

5605

In [99]:
rna = rna[[i in to_keep2_list for i in rna.obs['transfer_barcode2']]]
rna

  res = method(*args, **kwargs)


View of AnnData object with n_obs × n_vars = 5605 × 33997
    obs: 'sample', 'donor', 'Library_ATAC', 'Library_RNA', 'germcell_RNA', 'ATAC_code', 'transfer_barcode', 'transfer_barcode2'
    var: 'gene_ids-0'

In [100]:
rna.obs['original_barcode'] = rna.obs_names
rna.obs_names = rna.obs['transfer_barcode2']
rna.obs.head()

Trying to set attribute `.obs` of view, copying.


Unnamed: 0_level_0,sample,donor,Library_ATAC,Library_RNA,germcell_RNA,ATAC_code,transfer_barcode,transfer_barcode2,original_barcode
transfer_barcode2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
21-ACTGAAACATCCCGCT-1,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,ACTGAAACATCCCGCT,21-ACTGAAACATCCCGCT-1,HD_F_GON9525419_ACTGAAACATCCCGCT
21-AGGTTGCGTGGAAGGC-1,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,AGGTTGCGTGGAAGGC,21-AGGTTGCGTGGAAGGC-1,HD_F_GON9525419_AGGTTGCGTGGAAGGC
21-ATAGCATGTGTTGCAA-1,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,GC,21,ATAGCATGTGTTGCAA,21-ATAGCATGTGTTGCAA-1,HD_F_GON9525419_ATAGCATGTGTTGCAA
21-CAGCCTAAGAGGCTAA-1,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,PGC,21,CAGCCTAAGAGGCTAA,21-CAGCCTAAGAGGCTAA-1,HD_F_GON9525419_CAGCCTAAGAGGCTAA
21-CATAGGTTCACAGCCA-1,HD_F_GON9525419,Hrv15,HD_F_GON9525611,HD_F_GON9525419,GC,21,CATAGGTTCACAGCCA,21-CATAGGTTCACAGCCA-1,HD_F_GON9525419_CATAGGTTCACAGCCA


In [101]:
del rna.obs['transfer_barcode2']

In [102]:
rna.write('/nfs/team292/vl6/my_MULTIOME_dir/germcells_july2021/germcells__RNAseq_multiome.h5ad')

... storing 'ATAC_code' as categorical
... storing 'transfer_barcode' as categorical


In [103]:
%%R -i outdir -i experiment_prefix -o RNAseq
Convert(paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5ad"),  
        dest = paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5seurat"), overwrite = TRUE, verbose = TRUE)

RNAseq <- LoadH5Seurat(paste0(outdir, experiment_prefix, "_RNAseq_multiome.h5seurat"))
RNAseq

An object of class Seurat 
33997 features across 5605 samples within 1 assay 
Active assay: RNA (33997 features, 0 variable features)
