In [1]:
from yet_another_bioinformatic_tool import FastQFilter, BiologicalSequence, NucleicAcidSequence, DNASequence, RNASequence, AminoAcidSequence

### 1. FASTQ files examples

In [2]:
handler = FastQFilter('example_data/example_fastq.fastq')
handler

FastQFilter(
	path_to_input=example_data/example_fastq.fastq,
	path_to_output=example_data/example_fastq (7).fastq
	gc_bounds=(0, 100),
	length_bounds=(0, 4294967296),
	quality_threshold=0
)

In [3]:
handler.filter()

In [4]:
handler = FastQFilter('example_data/example_fastq.fastq', gc_bounds=(0, 60), length_bounds=(80, 300))
handler

FastQFilter(
	path_to_input=example_data/example_fastq.fastq,
	path_to_output=example_data/example_fastq (8).fastq
	gc_bounds=(0, 60),
	length_bounds=(80, 300),
	quality_threshold=0
)

In [5]:
handler.filter()

### 2. Sequences (DNA, RNA, proteins)

In [6]:
dna = DNASequence('ATGC')
rna = dna.transcribe()
protein = AminoAcidSequence('NIKITAVAULIN')

seqs = [dna, rna, protein]

In [7]:
dna

DNASequence('ATGC')

- #### common methods

In [8]:
for seq in seqs:
    print(seq.__repr__(), len(seq), seq.alphabet, seq.check_alphabet())

DNASequence('ATGC') 4 {'t', 'c', 'C', 'A', 'T', 'g', 'G', 'a'} True
RNASequence('AUGC') 4 {'U', 'c', 'u', 'C', 'A', 'g', 'G', 'a'} True
AminoAcidSequence('NIKITAVAULIN') 12 {'U', 'R', 'Q', 'd', 'E', 'l', 'i', 'M', 'K', 'L', 'k', 'Y', 'H', 'f', 'o', 'h', 'c', 's', 'w', 'A', 'S', 'g', 'G', 'N', 'a', 'r', 't', 'W', 'u', 'm', 'T', 'q', 'F', 'v', 'O', 'p', 'y', 'V', 'C', 'e', 'P', 'D', 'n', 'I'} True


- #### nucleic methods

In [15]:
for seq in seqs[:-1]:
    print(seq, seq.complement(), seq.gc_content)

ATGC TACG 0.25
AUGC UACG 0.25


- #### DNA method

In [10]:
dna.transcribe()

RNASequence('AUGC')

In [11]:
type(dna.transcribe())

yet_another_bioinformatic_tool.RNASequence

- #### protein method

In [12]:
protein.count_aa()

{'N': 2, 'I': 3, 'K': 1, 'T': 1, 'A': 2, 'V': 1, 'U': 1, 'L': 1}