### Requirements
For x64 Linux or macOS:
```
conda create -n orthofinder orthofinder
```

For arm64 macOS using Rosetta:
```
conda create -n orthofinder
conda activate orthofinder
conda config --env --set subdir osx-64
conda install orthofinder
```

Then set kernal of this Jupter Notebook to Python in orthofinder Conda environment

In [1]:
import os
import re
import requests
import subprocess

species_list = ["Astatotilapia calliptera", "Maylandia zebra", "Oreochromis niloticus", "Danio rerio", "Amphilophus citrinellus", "Pundamilia nyererei", "Haplochromis burtoni", "Neolamprologus brichardi", "Oryzias latipes"]

os.makedirs('pep', exist_ok=True)
for species in species_list:
    species = species.replace(' ', '_').lower()
    response = requests.get(f"https://ftp.ensembl.org/pub/current_fasta/{species}/pep/")
    genome = re.findall(r'href="([^"]+\.pep\.all\.fa\.gz)"', response.text)
    print(genome)
    if genome:
        genome_file = genome[0]
        genome_path = os.path.join('pep/original', genome_file)
        unzip_path = genome_path.replace('.gz', '')
        if not os.path.isfile(genome_path) and not os.path.isfile(unzip_path):
            dna_url = f"https://ftp.ensembl.org/pub/current_fasta/{species}/pep/{genome_file}"
            response = requests.get(dna_url)
            with open(genome_path, 'wb') as f:
                f.write(response.content)
        if not os.path.isfile(unzip_path):
            subprocess.run(['gunzip', '-k', genome_path])
        os.remove(genome_path)

['Astatotilapia_calliptera.fAstCal1.3.pep.all.fa.gz']
['Maylandia_zebra.M_zebra_UMD2a.pep.all.fa.gz']
['Oreochromis_niloticus.O_niloticus_UMD_NMBU.pep.all.fa.gz']
['Danio_rerio.GRCz11.pep.all.fa.gz']
['Amphilophus_citrinellus.Midas_v5.pep.all.fa.gz']
['Pundamilia_nyererei.PunNye1.0.pep.all.fa.gz']
['Haplochromis_burtoni.AstBur1.0.pep.all.fa.gz']
['Neolamprologus_brichardi.NeoBri1.0.pep.all.fa.gz']
['Oryzias_latipes.ASM223467v1.pep.all.fa.gz']


In [5]:
import os
import requests
import subprocess

primary_transcript_script = 'pep/primary_transcript.py'
if not os.path.isfile(primary_transcript_script):
    url = "https://raw.githubusercontent.com/davidemms/OrthoFinder/refs/heads/master/tools/primary_transcript.py"
    response = requests.get(url)
    with open(primary_transcript_script, 'wb') as f:
        f.write(response.content)
if not os.path.exists('pep/original/primary_transcripts'):
    fa_files = [f for f in os.listdir('pep/original') if f.endswith('.fa')]
    for f in fa_files:
        subprocess.run(['python', primary_transcript_script, os.path.join('pep/original', f)])
for file in os.listdir('pep/original/primary_transcripts'):
    os.rename(os.path.join('pep/original/primary_transcripts', file), os.path.join('pep', file))

In [8]:
zebrafish_file = 'pep/original/Danio_rerio.GRCz11.pep.all.fa'
filtered_file = 'pep/Danio_rerio.alt_ids.txt'

with open(zebrafish_file, 'r') as infile, open(filtered_file, 'w') as outfile:
    write = True
    for line in infile:
        if line.startswith('>'):
            if 'ALT_' in line:
                gene_id = line.split('gene:')[1].split()[0]
                outfile.write(f'{gene_id}\n')

In [6]:
if not os.path.exists('pep/OrthoFinder'):
    subprocess.run(["orthofinder", "-f", "pep"])


OrthoFinder version 2.5.5 Copyright (C) 2014 David Emms

2024-11-21 13:34:13 : Starting OrthoFinder 2.5.5
14 thread(s) for highly parallel tasks (BLAST searches etc.)
1 thread(s) for OrthoFinder algorithm

Checking required programs are installed
----------------------------------------
Test can run "mcl -h" - ok
Test can run "fastme -i /Users/home/Downloads/Project-Cichlids/pep/OrthoFinder/Results_Nov21/WorkingDirectory/dependencies/SimpleTest.phy -o /Users/home/Downloads/Project-Cichlids/pep/OrthoFinder/Results_Nov21/WorkingDirectory/dependencies/SimpleTest.tre" - ok

.DS_Store
primary_transcript.py
OrthoFinder expects FASTA files to have one of the following extensions: fasta, fa, faa, pep, fas

Dividing up work for BLAST for parallel processing
--------------------------------------------------
2024-11-21 13:34:15 : Creating diamond database 1 of 9
2024-11-21 13:34:15 : Creating diamond database 2 of 9
2024-11-21 13:34:15 : Creating diamond database 3 of 9
2024-11-21 13:34:16 : Cr