In [1]:
from yet_another_bioinformatic_tool import FastQFilter, BiologicalSequence, NucleicAcidSequence, DNASequence, RNASequence, AminoAcidSequence

### 1. FASTQ files examples

In [2]:
handler = FastQFilter('example_data/example_fastq.fastq')
handler

FastQFilter(
	path_to_input=example_data/example_fastq.fastq,
	path_to_output={self.path_to_output}
	gc_bounds={self.gc_bounds},
	length_bounds={self.length_bounds},
	quality_threshold={self.quality_threshold}
)

In [3]:
handler.filter()

In [4]:
handler = FastQFilter('example_data/example_fastq.fastq', gc_bounds=(0, 60), length_bounds=(80, 300))
handler

FastQFilter(
	path_to_input=example_data/example_fastq.fastq,
	path_to_output={self.path_to_output}
	gc_bounds={self.gc_bounds},
	length_bounds={self.length_bounds},
	quality_threshold={self.quality_threshold}
)

In [5]:
handler.filter()

### 2. Sequences (DNA, RNA, proteins)

In [6]:
dna = DNASequence('ATGC')
rna = dna.transcribe()
protein = AminoAcidSequence('NIKITAVAULIN')

seqs = [dna, rna, protein]

In [7]:
dna

DNASequence('ATGC')

- #### common methods

In [9]:
for seq in seqs:
    print(seq.__repr__(), len(seq), seq.alphabet, seq.check_alphabet())

DNASequence('ATGC') 4 {'t', 'G', 'A', 'g', 'a', 'T', 'C', 'c'} True
RNASequence('AUGC') 4 {'G', 'A', 'u', 'g', 'a', 'U', 'C', 'c'} True
AminoAcidSequence('NIKITAVAULIN') 12 {'t', 'O', 'G', 'i', 'l', 'F', 'h', 'v', 'R', 'Y', 'T', 'K', 'L', 'n', 'k', 'r', 'E', 's', 'y', 'N', 'w', 'm', 'o', 'P', 'A', 'u', 'Q', 'S', 'I', 'q', 'H', 'a', 'U', 'W', 'C', 'f', 'c', 'V', 'e', 'D', 'g', 'p', 'd', 'M'} True


- #### nucleic methods

In [10]:
for seq in seqs[:-1]:
    print(seq, seq.complement(), seq.gc_content)

ATGC TACG 0.25
AUGC UACG 0.25


- #### DNA method

In [11]:
dna.transcribe()

RNASequence('AUGC')

In [12]:
type(dna.transcribe())

yet_another_bioinformatic_tool.RNASequence

- #### protein method

In [13]:
protein.count_aa()

defaultdict(int,
            {'N': 2, 'I': 3, 'K': 1, 'T': 1, 'A': 2, 'V': 1, 'U': 1, 'L': 1})