# dash-cam Simulator

This python notebook serves the purpose of extracting the results of the dash-cam simulator 

In [None]:
import glob
import numpy as np
import os
import random
from simulator import *
from reads import Reads

#### Define the data directory

In [None]:
datadir = "data/"
kmer_size = 32
os.makedirs("data/", exist_ok=True)

#### Define the parallel simulator of dash-cam, and add references

In [None]:
parallel_search = ParallelSearch()
for genome_filepath in glob.glob(datadir + "/*.fna"):
    parallel_search.buildBlock(
        block_name=genome_filepath.split("/")[-1].split(".")[0],
        genome_file=genome_filepath
    )

#### Define read getter and list all available sequencer platforms

In [None]:
reads = Reads(datadir="data/", kmer_size=kmer_size)
platforms = reads.getPlatforms()

#### Generate results for the different platforms as a function of the dash-cam Hamming Distance toleration threshold

Note: The cell run might take a while.

In [None]:
for platform in platforms:
    parallel_search.recordResults(f"results/{platform}-threshold-varies.csv")
    for threshold in range(17):
        for genome_file in glob.glob(datadir + "/*.fna"):
            parallel_search.search(
                patterns=reads.getReads(
                    platform=platform,
                    genome_filepath=datadir + genome_file
                ),
                threshold=threshold,
                true_genome=genome_file.split("/")[-1].split(".")[0]
            )
    parallel_search.stopRecording()

#### Generate results for the accuracy as a function of the progressing time

In [None]:
#### Generate time results
parallel_search.recordResults(f"results/time.csv")
for time in np.linspace(0, 150, 50):
    for genome_file in glob.glob(datadir + "/*.fna"):
        parallel_search.search(
            patterns=reads.getReads(
                platform=platform,
                genome_filepath=datadir + genome_file
            ),
            time=time,
            true_genome=genome_file.split("/")[-1].split(".")[0]
        )
parallel_search.stopRecording()

#### Generate results for the accuracy as a function of the varrying discharge rate

In [None]:
parallel_search.recordResults(f"results/discharge-rate.csv")
for discharge_rate in np.linspace(0, 1, 50):
    for genome_file in glob.glob(datadir + "/*.fna"):
        parallel_search.search(
            patterns=reads.getReads(
                platform=platform,
                genome_filepath=datadir + genome_file
            ),
            discharge_rate=discharge_rate,
            true_genome=genome_file.split("/")[-1].split(".")[0]
        )
parallel_search.stopRecording()