In [1]:
import yaml
from pathlib import Path
import subprocess
from config import Configfile, CreateFolders
import pandas as pd
import os
import re


class ProgramCommando:
    ''' Superclass for different programs that can be run in gentools '''
    def __init__(self, config_file):
        self.config = Configfile(config_file)
        self.folders = CreateFolders(config_file)
        self.threads = str(self.config['threads'])
    
    def new_out_file(self, read, new_dir, suffix=None):
        suffix = read.suffix if not suffix else suffix
        return new_dir / (read.stem + '_' + self.program + suffix)
    
    def reads_in(self, folder, suffix=None):
        folder = self._reads_in_dict(folder)
        if suffix:
            return sorted([read for read in folder.iterdir() if read.suffix == suffix])
        return sorted([read for read in folder.iterdir() if read.is_file()])
    
    def _reads_in_dict(self, folder):
        input_folder_dict = {'raw': self.folders.raw_reads,
                            'umi_tools_extract': self.folders.umi_tools_processed,
                            'umi_tools_dedup': self.folders.umi_tools_processed_dedup,
                            'cutadapt': self.folders.cutadapt_processed,
                            'bowtie2_aligned': self.folders.bowtie2_processed_aligned,
                            'bowtie2_unaligned': self.folders.bowtie2_processed_unaligned,
                            'featureCounts': self.folders.feature_counts}
        return input_folder_dict[folder]
    
    def extract_parameters(self):
        pass
    
    def create_command(self):
        pass
    
    def run_command(self):
        pass

# WORKS LIKE A CHARM! :D 
class CutadaptCommando(ProgramCommando):
    def __init__(self, config_file):
        super().__init__(config_file)
        self.program = 'cutadapt'
       
    def extract_parameters(self):
        command = [self.program, '-j', self.threads]
        for params in self.config[self.program]:
            for key, value in params.items():
                if key == 'input':
                    self.input_files = self.reads_in(value)
                elif len(key) == 1:
                    command.append(f'-{key}')
                    command.append(str(value))
                else:
                    command.append(f'--{key}')
                    command.append(str(value))             
        return command
        

In [10]:
class FastpCommando(ProgramCommando):
    def __init__(self, config_file):
        super().__init__(config_file)
        self.program = 'fastp'
       
    def extract_parameters(self):
        command = [self.program, '--thread', self.threads]
        for params in self.config[self.program]:
            for key, value in params.items():
                if key == 'input':
                    self.input_files = self.reads_in(value)
                elif len(key) == 1:
                    command.append(f'-{key}')
                    command.append(str(value))
                else:
                    command.append(f'--{key}')
                    command.append(str(value))             
        return command
      
    def create_command(self):
        # to get the attribute self.input_files, self.extract_parameters needs to be run
        self.extract_parameters()
        list_of_commands = []
        for read in self.input_files:
            command = self.extract_parameters()
            read_out = self.new_out_file(read, self.folders.fastp_processed)
            log_file = self.folders.fastp_log / f'{read.stem}.json'
            command.append('-i')
            command.append(read)
            command.append('-o')
            command.append(read_out)
            command.append('--json')
            command.append(log_file)
            list_of_commands.append(command)
        return list_of_commands
            
    def run_command(self):
        commands = create_command()
        for command in commands:
            subprocess.call(command)
    
    

[['fastp',
  '--thread',
  '6',
  '--adapter_sequence',
  'TGGAATTCTCGGGTGCCAAGG',
  '-i',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/test_data/mock.fasta'),
  '-o',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/testar_lite/fastp/processed/mock_fastp.fasta'),
  '--json',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/testar_lite/fastp/log/mock.json')],
 ['fastp',
  '--thread',
  '6',
  '--adapter_sequence',
  'TGGAATTCTCGGGTGCCAAGG',
  '-i',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/test_data/mock2.fasta'),
  '-o',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/testar_lite/fastp/processed/mock2_fastp.fasta'),
  '--json',
  PosixPath('/Users/williamrosenbaum/Bioinformatics/gentools/testar_lite/fastp/log/mock2.json')]]