# Oqani et. al. 2025: StarSolo

In [None]:
#!/bin/bash
#
# This script aligns the Rinaldo/Maltepe FASTQs to XXX
# Performs alignments for all libraries in the current directory
# Uses 10X Chromium V3 settings & custom whitelist
#
# Any line that starts with #$ is an instruction to SGE
#$ -S /bin/bash                    #-- the shell for the job
#$ -o star.log                     #-- output log
#$ -cwd                            #-- tell the job that it should start in your working directory
#$ -r y                            #-- tell the system that if a job crashes, it should be restarted
#$ -j y                            #-- tell the system that the STDERR and STDOUT should be joined
#$ -l mem_free=64G                 #-- submits on nodes with enough free memory (required)
#$ -l scratch=300G                 #-- SGE resources (home and scratch disks)
#$ -l h_rt=48:00:00                #-- runtime limit (hr:min:sec)
#

# USER INPUTS:
path_to_star_index='/wynton/home/wagner/dwagner/references/star/230731_mm10/index'
path_to_10Xv3_whitelist='/wynton/home/wagner/dwagner/references/whitelists/10Xv3_3M-february-2018.txt'
base_path='/wynton/group/wagner/NIMBUS/FASTQ/230705_NovaSeq_PR/'
nThreads=32

# CODE:
clear
date
hostname
module load CBI
module load star/2.7.10b

# loop through each sample directory, which contains multiple sets of FASTQ files
cd ${base_path}
for sample in maltepee*; do

    cd ${base_path}${sample}
    echo Processing libraries for sample: ${sample}

    echo Removing previous Star outputs if they exist
    #rm -r Solo.out _STAR* Log.* SJ* Aligned*

    # format list of 10X R2-R1 FASTQ file pairs for this sample
    R1_list=(*R1*)
    R2_list=(*R2*)
    printf -v csv_r2 '%s,' "${R2_list[@]}"
    printf -v csv_r1 '%s,' "${R1_list[@]}"
    full_list="${csv_r2%,} ${csv_r1%,}"

    STAR \
        --genomeDir ${path_to_star_index} \
        --readFilesIn ${full_list} \
        --soloCBwhitelist ${path_to_10Xv3_whitelist} \
        --runThreadN ${nThreads} \
        --outFileNamePrefix ${base_path}${sample}/ \
        --twopassMode Basic \
        --runDirPerm All_RWX \
        --readFilesCommand zcat \
        --soloMultiMappers 'Uniform' \
        --soloFeatures Gene GeneFull Velocyto \
        --soloType CB_UMI_Simple \
        --soloCBlen 16 \
        --soloUMIstart 17 \
        --soloUMIlen 12 \
        --soloBarcodeReadLength 0 \
        --soloCBmatchWLtype 1MM \
	--soloCellFilter EmptyDrops_CR

        # tidy up the star output
        # remove the massive sam file and compress the rest
	rm ${base_path}${sample}/Aligned.out.sam
	gzip -f ${base_path}${sample}/Solo.out/*/*/*.mtx
        gzip -f ${base_path}${sample}/Solo.out/*/*/*.tsv
	tar --exclude="*fastq*" -zcf ${base_path}/${sample}.tar.gz ${base_path}${sample}/

done