# Shotgun Sequencing

### Generating a Random Genome

In [192]:
import random
import unittest

In [193]:
genome_length = 100

min_read_length = 2
max_read_length = 10

In [194]:
def generate_genome(genome_length):
    """Generates a genome sequence of a specified length.

    Args:
        length (integre): The length of the generated genome.

    Returns:
        string: Return a string, that is the generated genome.
    """

    bases  = ['A', 'C', 'G', 'T']
    generated_genome = ''

    for i in range(genome_length):
        generated_genome += random.choice(bases)

    return generated_genome

In [195]:
import random

def generate_reads(genome, min_read_length, max_read_length):
    """
    Simulates shotgun sequencing by breaking up a genome sequence into fragments of random length.

    Args:
    - genome (str): The genome sequence to be fragmented.

    Returns:
    - dna_reads (list of str): The resulting DNA reads.
    """

    dna_reads = []
    genome_size = len(genome)
    position = 0

    while position < genome_size:
        fragment_size = random.randint(min_read_length, max_read_length)

        if position + fragment_size > genome_size:
            fragment_size = genome_size - position

        dna_reads.append(genome[position:position+fragment_size])
        position += fragment_size

    return dna_reads

In [196]:
genome = generate_genome(genome_length)
dna_reads = generate_reads(genome, min_read_length, max_read_length)
reconstructed_genome = "".join(dna_reads)

if reconstructed_genome == genome:
    print("Assembly completed with succes!")
    print("Genome: {}".format(genome))
    print("Reconstructed genome: {}".format(reconstructed_genome))

Assembly completed with succes!
Genome: GAAAAGGTGACCCCGACTGCCTCTTCTTCTATCCACCAGCCGTCCCTAACCTGTGGCCACTCATGGTAACGCGGTAGGATGCTTTAATCTTAAGCATGTG
Reconstructed genome: GAAAAGGTGACCCCGACTGCCTCTTCTTCTATCCACCAGCCGTCCCTAACCTGTGGCCACTCATGGTAACGCGGTAGGATGCTTTAATCTTAAGCATGTG


In [197]:
print(dna_reads)

['GA', 'AAAGGTGA', 'CCCCGACT', 'GCC', 'TCT', 'TCTTCT', 'ATCCACCAG', 'CCGTCCC', 'TAA', 'CC', 'TGTGGCC', 'ACTCA', 'TGGTAA', 'CGCGGTAG', 'GATGCTT', 'TAATCT', 'TAAGCA', 'TGTG']


In [198]:
def shuffle_reads(dna_reads):
    random.shuffle(dna_reads)
    return dna_reads

In [199]:
print(shuffle_reads(dna_reads))

['CCCCGACT', 'TGTG', 'CC', 'TCT', 'GATGCTT', 'TAA', 'AAAGGTGA', 'CGCGGTAG', 'TAAGCA', 'ATCCACCAG', 'TGTGGCC', 'TCTTCT', 'GCC', 'ACTCA', 'GA', 'CCGTCCC', 'TAATCT', 'TGGTAA']
