# RNA-seq expression - Code

### Perform data quality summary via fastqc

In [None]:
sos run /home/ziningqi0913/xqtl-pipeline/pipeline/RNA_calling.ipynb fastqc \
    --cwd /home/ziningqi0913/output/rnaseq/fastqc \
    --samples /home/ziningqi0913/fastq/xqtl_protocol_data.fastqlist \
    --data-dir /home/ziningqi0913/fastq \
    --container /home/ziningqi0913/containers/rna_quantification.sif \
    --gtf /home/ziningqi0913/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.ERCC.gtf


### Read alignment via STAR and QC via Picard

In [None]:
sos run pipeline/RNA_calling.ipynb STAR_output \
    --cwd output/rnaseq --samples ROSMAP_data/RNASeq/fastq/xqtl_protocol_data.fastqlist \
    --data-dir ROSMAP_data/RNASeq/fastq --STAR-index reference_data/STAR_Index/ \
    --gtf reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.ERCC.gtf \
    --container containers/rna_quantification.sif \
    --reference-fasta reference_data/GRCh38_full_analysis_set_plus_decoy_hla.noALT_noHLA_noDecoy_ERCC.fasta \
    --ref-flat reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.ERCC.ref.flat

### Call gene-level RNA expression via rnaseqc
Following steps generate the count table of gene expression

In [None]:
sos run /home/ziningqi0913/xqtl-pipeline/pipeline/RNA_calling.ipynb rnaseqc_call \
    --cwd /home/ziningqi0913/output/rnaseq \
    --samples /home/ziningqi0913/fastq/xqtl_protocol_data.fastqlist    --data-dir /home/ziningqi0913/fastq \
    --gtf /home/ziningqi0913/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.gtf \
    --container /home/ziningqi0913/containers/rna_quantification.sif \
    --reference-fasta /home/ziningqi0913/reference_data/GRCh38_full_analysis_set_plus_decoy_hla.noALT_noHLA_noDecoy.fasta \
    --bam_list /home/ziningqi0913/output/rnaseq/xqtl_protocol_data_bam_list

### Call transcript level RNA expression via RSEM

In [None]:
sos run /home/ziningqi0913/xqtl-pipeline/pipeline/RNA_calling.ipynb rsem_call  \
    --cwd /home/ziningqi0913/output/rnaseq   \
    --samples /home/ziningqi0913/fastq/xqtl_protocol_data.fastqlist  \
    --data-dir /home/ziningqi0913/fastq/   \
    --RSEM-index /home/ziningqi0913/reference_data/RSEM_Index/   \
    --container /home/ziningqi0913/containers/rna_quantification.sif   \
    --bam_list /home/ziningqi0913/output/rnaseq/xqtl_protocol_data_bam_list   

### Multi-sample RNA-seq QC

In [None]:
sos run /home/ziningqi0913/xqtl-pipeline/pipeline/bulk_expression_QC.ipynb qc \
    --cwd /home/ziningqi0913/output/rnaseq \ \
    --tpm-gct /home/ziningqi0913/output/rnaseq/xqtl_protocol_data.rnaseqc.gene_tpm.gct.gz \
    --counts-gct /home/ziningqi0913/output/rnaseq/xqtl_protocol_data.rnaseqc.gene_readsCount.gct.gz \
    --container /home/ziningqi0913/containers/rna_quantification.sif 

### Multi-sample read count normalization

In [None]:
sos run /home/ziningqi0913/xqtl-pipeline/pipeline/bulk_expression_normalization.ipynb normalize \
    --cwd /home/ziningqi0913/output/rnaseq \
    --tpm-gct /home/ziningqi0913/output/rnaseq/xqtl_protocol_data.rnaseqc.low_expression_filtered.outlier_removed.tpm.gct.gz \
    --counts-gct /home/ziningqi0913/output/rnaseq/xqtl_protocol_data.rnaseqc.low_expression_filtered.outlier_removed.geneCount.gct.gz \
    --annotation-gtf /home/ziningqi0913/reference_data/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf  \
    --container /home/ziningqi0913/containers/rna_quantification.sif \
    --count-threshold 1 --sample_participant_lookup /home/ziningqi0913/reference_data/sample_participant_lookup.rnaseq