# HMMER TIR domain search code

This code will run the translated genomes genomes from transdecoder and run them though HMMER searching for TIR associated domains. TIR domain files were downloaded from the Pfam online database.

# Run HMMER

Build a HMM profile using the TIR domain from Pfam

In [2]:
#!/bash
import os
pfam_domain_dir = "../input/domains"
output_dir = "../output/domains"
for domain in os.listdir(pfam_domain_dir):
    if not domain.endswith(".sto"):
        continue
    pfam_domain_filepath = os.path.join(pfam_domain_dir, domain)
    
    print(f"Building a HMM profile from: {pfam_domain_filepath}")
    #need to split the file name
    base_filename = os.path.splitext(domain)[0]
    
    #build the hmmer profile for each domain in the folder and save the file
    output_filepath = os.path.join(output_dir, f"{base_filename}_profile.hmm")
    
    !echo "Building HMM profile: {output_filepath}"
    
    !hmmbuild {output_filepath} {pfam_domain_filepath} > /dev/null 2>&1

Building a HMM profile from: ../input/domains/PF13519_full.sto
Building HMM profile: ../output/domains/PF13519_full_profile.hmm
Building a HMM profile from: ../input/domains/PF13927_full.sto
Building HMM profile: ../output/domains/PF13927_full_profile.hmm
Building a HMM profile from: ../input/domains/PF18452_full.sto
Building HMM profile: ../output/domains/PF18452_full_profile.hmm
Building a HMM profile from: ../input/domains/PF13855_full.sto
Building HMM profile: ../output/domains/PF13855_full_profile.hmm
Building a HMM profile from: ../input/domains/PF00560_full.sto
Building HMM profile: ../output/domains/PF00560_full_profile.hmm
Building a HMM profile from: ../input/domains/PF13516_full.sto
Building HMM profile: ../output/domains/PF13516_full_profile.hmm
Building a HMM profile from: ../input/domains/PF07725_full.sto
Building HMM profile: ../output/domains/PF07725_full_profile.hmm
Building a HMM profile from: ../input/domains/PF18837_full.sto
Building HMM profile: ../output/domains/P

Search the transdecoder longest_orfs file for each genome against the TIR domain.

In [2]:
import os
#search transdecoder file for TIR domains and save them
genome_dir_cnid = "../input/cnidaria/longest_orfs/transdecoder_part1"
genome_dir_symbio = "../input/symbiodinium/longest_orfs/transdecoder_part1"
hmm_dir = "../output/domains"
output_dir_cnid = "../output/cnidaria/hmmer_txt_output"
output_dir_symbio = "../output/symbiodinium/hmmer_txt_output"

for hmm_file in os.listdir(hmm_dir):
    if not hmm_file.endswith(".hmm"):
            continue
    hmm_filepath = os.path.join(hmm_dir,hmm_file)
 
    #run HMMER on cnidarian transdecoder files
    for genome_file in os.listdir(genome_dir_cnid):
        if not genome_file.endswith(".pep"):
            continue
        cnidarian_pep_filepath = os.path.join(genome_dir_cnid,genome_file)
        
        print(f"Running HMM search on genome {cnidarian_pep_filepath}")
        print(f"Running HMM search using HMM file {hmm_filepath}")
       
        cnidarian_pep_split = os.path.splitext(genome_file)[0]
        hmm_split = os.path.splitext(hmm_file)[0]
        
        output_file = f"{cnidarian_pep_split}_{hmm_split}_hmmer_results.txt"
        output_filepath = os.path.join(output_dir_cnid, output_file)
        print(f"Generating output file path {output_filepath}")
        
        !hmmsearch --tblout {output_filepath} {hmm_filepath} {cnidarian_pep_filepath} > /dev/null 2>&1
    
    #run HMMER on Symbiodiniaceae
    for genome_file in os.listdir(genome_dir_symbio):
        if not genome_file.endswith(".pep"):
            continue
        symbiodinium_pep_filepath = os.path.join(genome_dir_symbio,genome_file)
        
        print(f"Running HMM search on genome {symbiodinium_pep_filepath}")
        print(f"Running HMM search using HMM file {hmm_filepath}")
       
        symbiodinium_pep_split = os.path.splitext(genome_file)[0]
        hmm_split = os.path.splitext(hmm_file)[0]
        
        output_file = f"{symbiodinium_pep_split}_{hmm_split}_hmmer_results.txt"
        output_filepath = os.path.join(output_dir_symbio, output_file)
        print(f"Generating output file path {output_filepath}")
        
        !hmmsearch --tblout {output_filepath} {hmm_filepath} {symbiodinium_pep_filepath} > /dev/null 2>&1


Running HMM search on genome ../input/cnidaria/longest_orfs/transdecoder_part1/A_cytherea_longest_orfs.pep
Running HMM search using HMM file ../output/domains/PF18452_full_profile.hmm
Generating output file path ../output/cnidaria/hmmer_txt_output/A_cytherea_longest_orfs_PF18452_full_profile_hmmer_results.txt
Running HMM search on genome ../input/cnidaria/longest_orfs/transdecoder_part1/Aiptasia_longest_orfs.pep
Running HMM search using HMM file ../output/domains/PF18452_full_profile.hmm
Generating output file path ../output/cnidaria/hmmer_txt_output/Aiptasia_longest_orfs_PF18452_full_profile_hmmer_results.txt
Running HMM search on genome ../input/cnidaria/longest_orfs/transdecoder_part1/A_aurita_longest_orfs.pep
Running HMM search using HMM file ../output/domains/PF18452_full_profile.hmm
Generating output file path ../output/cnidaria/hmmer_txt_output/A_aurita_longest_orfs_PF18452_full_profile_hmmer_results.txt
Running HMM search on genome ../input/cnidaria/longest_orfs/transdecoder_pa