In [None]:
import random

import numpy as np

from model_evaluation import single_decoding_routine

In [None]:
# SIMULATION PARAMETERS
tree_path = "tree.json"
number_of_nucleotids = 100
alphabet = ['A', 'C', 'T', 'G']
alphabetSize = len(alphabet)

nbState = 4
# transition matrix of the toy gene finder
A = np.zeros((nbState, nbState))
A[0, 1] = 1
A[1, 2] = 1
A[2, 3] = 0.33
A[2, 0] = 1 - A[2, 3]
A[3, 3] = 0.33  # 0.9999  # unrealistic ...
A[3, 0] = 1 - A[3, 3]

# state initial probability
b = np.array([0.25, 0.25, 0.26, 0.24])

animalNames = ["dog", "cat", "pig", "cow", "rat", "mouse", "baboon",
               "human"]
n_species = len(animalNames)
"""[...], such as the higher average rate of substitution and the greater
transition/transversion ratio, in noncoding and third-codon-position sites
than in firstand second- codon-position sites[...]"""

pi = np.zeros((nbState, alphabetSize))
# substitution rates for pi 0 and 1 are between 0 and 0.001
pi[0] = np.random.rand(alphabetSize) * 0.001
pi[1] = np.random.rand(alphabetSize) * 0.001
# but between 0 and 0.01 for pi 2 and 3
pi[2] = np.random.rand(alphabetSize) * 0.01
pi[3] = np.random.rand(alphabetSize) * 0.01
pi /= pi.sum(axis=1)[:, None]

# translation/transversion rate
kappa = np.array([2.3, 2.7, 4.3, 5.4])

In [None]:
# MONTE-CARLO PARAMETERS
n_nucleotids = 10000
n_simulations = 50

In [None]:
# Precision, sensitivity and specificity
precision, sensitivity, specificity = np.zeros(n_simulations), np.zeros(n_simulations), np.zeros(n_simulations)
for i in range(n_simulations):
    precision[i], sensitivity[i], specificity[i] = single_decoding_routine(tree_path, number_of_nucleotids, alphabet, 
                                                                           A, b, n_species, pi, kappa)
summary = {
    "precision": np.mean(precision),
    " sensitivity": np.mean(sensitivity),
    "specificity": np.mean(specificity)
          }


In [None]:
summary