In [None]:
from src.io_utils import load_fasta
from src.preprocess import clean_sequences
from src.analysis import motif_count, gc_content
from src.alignments import hamming_distance
from src.mutations import mutate_sequence

# Step 1: Load sequences
sequences = load_fasta('data/raw/sample.fasta')
print(f"Loaded {len(sequences)} sequences")

# Step 2: Clean sequences
cleaned = clean_sequences(sequences)
print(f"{len(cleaned)} sequences after cleaning")

# Step 3: Basic Analysis
motif = 'ATG'
counts = motif_count(cleaned, motif)
print(f"Motif '{motif}' counts: {counts}")

gc = [gc_content(seq) for seq in cleaned]
print(f"Average GC content: {sum(gc)/len(gc):.3f}")

# Step 4: Compare two sequences
if len(cleaned) >= 2:
    dist = hamming_distance(cleaned[0], cleaned[1])
    print(f"Hamming distance between first two sequences: {dist}")

# Step 5: Mutation example
print("Original:", cleaned[0])
print("Mutated:", mutate_sequence(cleaned[0], mutation_rate=0.05))
