# Single-Cell Filter QC Report

In [None]:
# Import packages
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import warnings

## Read Data

In [None]:
adata = sc.read_h5ad(filename=FILE)

## Basic Statistics

In [None]:
n_counts_per_gene = np.sum(adata.X, axis=0)
n_cells_per_gene = np.sum(adata.X>0, axis=0)
# Show info
print("Number of counts (in the dataset units) per gene:", n_counts_per_gene.min(), " - " ,n_counts_per_gene.max())
print("Number of cells in which each gene is detected:", n_cells_per_gene.min(), " - " ,n_cells_per_gene.max())

## Diagnostic Plots (pre-filtering)

### Number of genes expressed per cell

In [None]:
if 'n_genes' not in adata.obs.keys():
    raise Exception("n_genes not found")
    
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16, 6), dpi=150, sharey=True)

x = adata.obs['n_genes']
x_lowerbound = 1500
x_upperbound = 2000
nbins=100

sns.distplot(x, ax=ax1, norm_hist=True, bins=nbins)
sns.distplot(x, ax=ax2, norm_hist=True, bins=nbins)
sns.distplot(x, ax=ax3, norm_hist=True, bins=nbins)

ax2.set_xlim(0,x_lowerbound)
ax3.set_xlim(x_upperbound, adata.obs['n_genes'].max() )
ax1.title.set_text('n_genes')
ax2.title.set_text('n_genes, lower bound')
ax3.title.set_text('n_genes, upper bound')

fig.tight_layout()

### Percentage of mitochondrial reads per cel

In [None]:
if 'percent_mito' in adata.obs.keys():
    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16, 6), dpi=150, sharey=True)

    x = adata.obs['percent_mito']
    x_lowerbound = [0.0, 0.07 ]
    x_upperbound = [ 0.10, 0.3 ]
    nbins=100

    sns.distplot(x, ax=ax1, norm_hist=True, bins=nbins)
    sns.distplot(x, ax=ax2, norm_hist=True, bins=int(nbins/(x_lowerbound[1]-x_lowerbound[0])) )
    sns.distplot(x, ax=ax3, norm_hist=True, bins=int(nbins/(x_upperbound[1]-x_upperbound[0])) )

    ax2.set_xlim(x_lowerbound[0], x_lowerbound[1])
    ax3.set_xlim(x_upperbound[0], x_upperbound[1] )
    ax1.title.set_text('percent_mito')
    ax2.title.set_text('percent_mito, lower bound')
    ax3.title.set_text('percent_mito, upper bound')

    fig.tight_layout()
else:
    warnings.warn("Percentage of mitochondrial genes expressed in cells not calculated")

### Three-panel summary plots

#### Distributions of number of genes, number of counts, and percent of mitochondrial genes

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(16, 6), dpi=150, sharey=False)

sns.distplot( adata.obs['n_genes'], ax=ax1, norm_hist=True, bins=100)
ax1.title.set_text('n_genes')

sns.distplot( adata.obs['n_counts'], ax=ax2, norm_hist=True, bins=100)
ax2.title.set_text('n_counts')

if 'percent_mito' in adata.obs.keys():
    print("Percentage of mitochondrial genes expressed in cells not calculated")
    sns.distplot( adata.obs['percent_mito'], ax=ax3, norm_hist=True, bins=100)
    ax3.title.set_text('percent_mito')
else:
    warnings.warn("Percentage of mitochondrial genes expressed in cells not calculated")
    
fig.tight_layout()

#### Violin plots of number of genes, number of counts, and percent of mitochondrial genes

In [None]:
metrics = ['n_genes', 'n_counts', 'percent_mito']
sc.pl.violin(adata, np.array(['n_genes', 'n_counts', 'percent_mito'])[np.isin(metrics,adata.obs.keys())],
    jitter=0.4, multi_panel=True )

#### Scatter plot number of genes vs number of counts

In [None]:
if 'percent_mito' in adata.obs.keys():
    sc.pl.scatter(adata, x='n_counts', y='n_genes', color='percent_mito')
else:
    sc.pl.scatter(adata, x='n_counts', y='n_genes')
    warnings.warn("Percentage of mitochondrial genes expressed in cells not calculated")