### Reformat some adata.obs information to conform with other adata files

In [1]:
import scanpy as sc
import numpy as np
from collections import Counter

In [2]:
adata = sc.read_h5ad("02_fetal_ENCODE.h5ad")
adata

AnnData object with n_obs × n_vars = 74029 × 58780
    obs: 'donor_id', 'age', 'sex', 'region', 'study', 'technology', 'cell_or_nuclei', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt', 'total_counts_ribo', 'log1p_total_counts_ribo', 'pct_counts_ribo', 'total_counts_hb', 'log1p_total_counts_hb', 'pct_counts_hb', 'n_genes', 'doublet_score', 'predicted_doublet', 'leiden', 'cell_type', 'consistent_cell_type'
    var: 'mt', 'ribo', 'hb', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection'
    uns: 'consistent_cell_type_colors', 'dendrogram_leiden', 'donor_id_colors', 'hvg', 'leiden', 'l

### Make the age in gestational weeks, rather than gestational days

In [3]:
adata.obs.age = np.round(adata.obs.age / 7)
adata.obs.age

index
ENCFF802AQC:AAACAGCCAAGCTACC    13.0
ENCFF802AQC:AAACAGCCACATTGCA    13.0
ENCFF802AQC:AAACATGCAATCCCTT    13.0
ENCFF802AQC:AAACATGCAGCAACAG    13.0
ENCFF802AQC:AAACATGCAGGCGAGT    13.0
                                ... 
ENCFF684YRB:TTTGTGTTCTACCTGC    10.0
ENCFF684YRB:TTTGTTGGTACTAAAC    10.0
ENCFF684YRB:TTTGTTGGTCGTAATG    10.0
ENCFF684YRB:TTTGTTGGTTAGAGGG    10.0
ENCFF684YRB:TTTGTTGGTTCATTTG    10.0
Name: age, Length: 74029, dtype: float64

### Make sex male and female, rather than M and F

In [4]:
adata.obs.sex = adata.obs.sex.astype(str)
adata.obs.loc[adata.obs.sex == "M", 'sex'] = "male"
adata.obs.loc[adata.obs.sex == "F", 'sex'] = "female"
Counter(adata.obs.sex)

Counter({'female': 53960, 'male': 20069})

In [5]:
adata.write("03_reformatted_ENCODE_fetal.h5ad")

In [6]:
Counter(adata.obs.donor_id)

Counter({'ENCFF775ANN': 16642,
         'ENCFF248EWR': 16230,
         'ENCFF727JRO': 9524,
         'ENCFF776DQR': 7612,
         'ENCFF684YRB': 7442,
         'ENCFF849ALE': 7424,
         'ENCFF805YRY': 5316,
         'ENCFF802AQC': 3839})