### metaSPAdes Apple Assembly

https://ablab.github.io/spades/

In [1]:
module load spades

#### Change working directory

In [2]:
cd /xdisk/kcooper/caparicio/tree-fruit/

#### Confirm directory change

In [3]:
pwd

/xdisk/kcooper/caparicio/tree-fruit


#### To test-run the FIRST SAMPLE, run this code

In [None]:
spades.py --meta \
-1 01c_fastp_reads/apples313_1.trimmed.fastq \
-2 01c_fastp_reads/apples313_2.trimmed.fastq \
-k 21,33,55,77 \
-o 02_assembly/apples313

#### If the above ran normally, run the following to PROCESS REMAINING

In [None]:
files1=(01c_fastp_reads/*apples*_1.trimmed.fastq)

for f1 in "${files1[@]}"; do
    f2="${f1/_1.trimmed.fastq/_2.trimmed.fastq}" # Constructs the path to the reverse read file
    sample_name=$(basename "$f1" "_1.trimmed.fastq") # Extracts the sample name
    output_dir="02_assembly/${sample_name}" # Specifies the output directory based on the sample name

    # Ensure a clean start by checking and cleaning any previous run data in the output directory
    if [ "$(ls -A $output_dir)" ]; then
        echo "Output directory $output_dir is not empty. Cleaning up..."
        rm -rf ${output_dir:?}/{,.[!.],..?}* # This attempts to remove all files, including hidden ones
        # Verify clean-up success
        if [ "$(ls -A $output_dir)" ]; then
            echo "Failed to clean the directory, attempting again..."
            rm -rf ${output_dir:?}/{,.[!.],..?}*
            # Final verification
            if [ "$(ls -A $output_dir)" ]; then
                echo "Critical error: Output directory $output_dir cannot be cleaned."
                exit 1 # Exit if still not clean to prevent SPAdes error
            fi
        fi
    else
        echo "Creating output directory $output_dir"
        mkdir -p "$output_dir"
    fi
    
    # Move or backup the old log file if not resuming
    if [ -f "${output_dir}/spades_apple.log" ]; then
        mv "${output_dir}/spades_apple.log" "${output_dir}/spades_apple.log.backup"
    fi

    # Creates the temporary directory only after successful cleaning verification
    mkdir -p "${output_dir}/tmp" 
    temp_dir="${output_dir}/tmp" # Specifies a temporary directory for SPAdes processing within the output directory

    # Construct the initial SPAdes command
    cmd="spades.py --meta -k 21,33,55,77 -o $output_dir --tmp-dir $temp_dir -1 $f1 -2 $f2"
    
    # If a previous SPAdes log exists, modify the command to resume from the last checkpoint
    if [ -f "${output_dir}/spades_apple.log" ]; then
        echo "Attempting to resume SPAdes assembly in $output_dir"
        cmd="spades.py -o $output_dir --restart-from last"
    else
        echo "Starting new SPAdes assembly in $output_dir"
    fi
    
    echo "Running command: $cmd"
    eval $cmd
    
    # Check for successful completion and clean up temporary files
    if [ -f "${output_dir}/contigs.fasta" ]; then
        echo "Assembly completed successfully for $sample_name. Cleaning up temporary files."
        rm -rf "$temp_dir"
    else
        echo "Assembly incomplete or failed for $sample_name. Temporary files retained for troubleshooting."
    fi
done

Output directory 02_assembly/apples313 is not empty. Cleaning up...
Starting new SPAdes assembly in 02_assembly/apples313
Running command: spades.py --meta -k 21,33,55,77 -o 02_assembly/apples313 --tmp-dir 02_assembly/apples313/tmp -1 01c_fastp_reads/apples313_1.trimmed.fastq -2 01c_fastp_reads/apples313_2.trimmed.fastq




Command line: /opt/ohpc/pub/apps/spades/3.15.5/bin/spades.py	--meta	-k	21,33,55,77	-o	/xdisk/kcooper/caparicio/tree-fruit/02_assembly/apples313	--tmp-dir	/xdisk/kcooper/caparicio/tree-fruit/02_assembly/apples313/tmp	-1	/xdisk/kcooper/caparicio/tree-fruit/01c_fastp_reads/apples313_1.trimmed.fastq	-2	/xdisk/kcooper/caparicio/tree-fruit/01c_fastp_reads/apples313_2.trimmed.fastq	

System information:
  SPAdes version: 3.15.5
  Python version: 2.7.5
  OS: Linux-3.10.0-1160.108.1.el7.x86_64-x86_64-with-centos-7.9.2009-Core

Output dir: /xdisk/kcooper/caparicio/tree-fruit/02_assembly/apples313
Mode: read error correction and assembling
Debug mode is turned OFF

Dataset par