# Transdecoder to HMMER TIR domain code

This code will unzip Symbiodiniaceae or Coral genomes dowloaded from NCBI in a ".fna" final format. Next Genomes are translated using transdecoder and then finaly run though HMMER searching for TIR domains. TIR domain file was downloaded from the Pfam online database.

In [1]:
#Unzip the symbiodinium or coral genomes
#!/bash
import os
genome_dir = "../input/genome_raw"
for genome in os.listdir(genome_dir):
    if not genome.endswith(".gz"):
        continue
    unzip_file_path = os.path.join(genome_dir,genome)
    !gzip -d {unzip_file_path}


# Run transdecoder on genome files downloaded from NCBI
Note that the output files are generated in the location of the code script. In this case the procedure
folder.

In [2]:
#run transdecoder
#!/bash
import os
genome_dir = "../input/genome_raw"
for genome in os.listdir(genome_dir):
    if not genome.endswith(".fna"):
        continue
    transdecoder_file_path = os.path.join(genome_dir,genome)
    
    print(f"Finding the longest open reading frame for {transdecoder_file_path}") 
    
    !TransDecoder.LongOrfs -t {transdecoder_file_path} > /dev/null 2>&1

Finding the longest open reading frame for ../input/genome_raw/A_aurita_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/G_fascicularis_final_1.0.fna
^C
Finding the longest open reading frame for ../input/genome_raw/P_verrucosa_v1.0_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/Aiptasia_genome_1.1_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/Fungia_final_1.0.fna
^C
Finding the longest open reading frame for ../input/genome_raw/A_hyactintus_1.0_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/P_damicornis_v1_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/A_cytherea_1.0_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/O_faveolata_v1_genomic.fna
^C
Finding the longest open reading frame for ../input/genome_raw/p_lutea_final_2.1.fna
^C
Finding the longest open reading frame for ../input/genome_raw/S_pistillata

# Move the files generated from transdecoder 
Move all transdecoder files in the procedure folder to output/transdecoder_part1.
Move the transdecoder "longest_orfs.pep" file (in the transdecoder_dir file folder) to input/genomes/longest_orfs/transdecoder_part1.

In [None]:
#example moving script
!cp ../procedure/genome_raw/GCA_000507305.1_ASM50730v1_genomic.fna.transdecoder_dir/longest_orfs.pep ../input/genomes/B_minutum.fasta

# Run HMMER

Build a HMM profile using the TIR domain from Pfam

In [3]:
#!/bash
import os
pfam_domain_dir = "../input/domains"
output_dir = "../output"
for domain in os.listdir(pfam_domain_dir):
    if not domain.endswith(".sto"):
        continue
    pfam_domain_filepath = os.path.join(pfam_domain_dir, domain)
    
    print("Building a HMM profile from: {pfam_domain_filepath}")
    #need to split the file name
    base_filename = os.path.splitext(domain)[0]
    
    #build the hmmer profile for each domain in the folder and save the file
    output_filepath = os.path.join(output_dir, f"{base_filename}_profile.hmm")
    
    !echo "Building HMM profile: {output_filepath}"
    
    !hmmbuild {output_filepath} {pfam_domain_filepath} > /dev/null 2>&1

Building a HMM profile from: {pfam_domain_filepath}
Building HMM profile: ../output/PF01582_full_up_profile.hmm


Search the transdecoder longest_orfs file for each genome against the TIR domain.

In [4]:
import os
#search transdecoder file for TIR domains and save them
genome_dir = "../input/genomes/longest_orfs/transdecoder_part1"
hmm_dir = "../output/"
output_dir ="../output"
for hmm_file in os.listdir(hmm_dir):
    if not hmm_file.endswith(".hmm"):
            continue
    hmm_filepath = os.path.join(hmm_dir,hmm_file)
 
    for genome_file in os.listdir(genome_dir):
        if not genome_file.endswith(".pep"):
            continue
        cnidarian_pep_filepath=os.path.join(genome_dir,genome_file)
        
        print(f"Running HMM search on genome {cnidarian_pep_filepath}")
        print(f"Running HMM search using HMM file {hmm_filepath}")
       
        cnidarian_pep_split = os.path.splitext(genome_file)[0]
        
        output_file = f"{cnidarian_pep_split}_hmmer_results.txt"
        output_filepath = os.path.join(output_dir, output_file)
        print(f"Generating output file path {output_filepath}")
        
        !hmmsearch --tblout {output_filepath} {hmm_filepath} {cnidarian_pep_filepath} > /dev/null 2>&1


Running HMM search on genome ../input/genomes/longest_orfs/transdecoder_part1/A_cytherea_longest_orfs.pep
Running HMM search using HMM file ../output/PF01582_full_up_profile.hmm
Generating output file path ../output/A_cytherea_longest_orfs_hmmer_results.txt
Running HMM search on genome ../input/genomes/longest_orfs/transdecoder_part1/Aiptasia_longest_orfs.pep
Running HMM search using HMM file ../output/PF01582_full_up_profile.hmm
Generating output file path ../output/Aiptasia_longest_orfs_hmmer_results.txt
Running HMM search on genome ../input/genomes/longest_orfs/transdecoder_part1/A_aurita_longest_orfs.pep
Running HMM search using HMM file ../output/PF01582_full_up_profile.hmm
Generating output file path ../output/A_aurita_longest_orfs_hmmer_results.txt
Running HMM search on genome ../input/genomes/longest_orfs/transdecoder_part1/P_lutea_longest_orfs.pep
Running HMM search using HMM file ../output/PF01582_full_up_profile.hmm
Generating output file path ../output/P_lutea_longest_orfs_