Correlation analysis

In [523]:
import random
import numpy as np
import math

In [558]:
n = 3 #num x
m = 4 #num theta
N = 5 #num time steps
reaction_coefficients_file = 'reaction_coefficients.txt'
ks = {}
with open(reaction_coefficients_file, 'r') as o: 
    for l in o:
        k = l.split('=')[0]
        ks[k] = float(eval(l.split('=')[1].replace('\n', '').strip()))

In [559]:
solveddxdtheta = [[[random.randint(1,5) for i in range(n)] for k in range(N)] for j in range(m)]
x = [[random.randint(1,5) for k in range(N)] for i in range(n)]

In [560]:
def sbar(i, j, k):
    return solveddxdtheta[j][k][i] * ks['k{}'.format(j+1)] / x[i][k]

In [561]:
S = []
for j in range(m):
    ls_theta = []
    for k in range(N):
        ls_k = []
        for i in range(n):
            ls_k.append(sbar(i,j,k))
        ls_theta.append(ls_k)
    S.append(ls_theta)
    
#for theta_mat in S:
#    print(theta_mat)            

In [562]:
S

[[[2.0, 1.0, 0.5],
  [0.5, 0.2, 0.125],
  [0.3, 0.4, 2.5],
  [1.25, 0.3, 0.3333333333333333],
  [0.3, 0.375, 0.375]],
 [[0.0015, 0.00025, 0.0004],
  [0.0006666666666666666, 0.0005, 0.00025],
  [0.0002, 0.0004, 0.0015],
  [0.00025, 0.0004, 0.0005],
  [0.0005, 0.000375, 0.0005]],
 [[0.5, 0.5, 0.3],
  [0.5, 0.3, 0.5],
  [0.2, 0.5, 1.0],
  [0.75, 0.1, 0.5],
  [0.3, 0.25, 0.125]],
 [[0.002, 0.00025, 0.00030000000000000003],
  [0.0005, 0.0002, 0.000625],
  [0.0002, 0.0001, 0.001],
  [0.00125, 0.0001, 0.00016666666666666666],
  [0.0005, 0.000625, 0.000625]]]

In [563]:
#for each theta_mat in S
#for each time step k
#add all the sbar vals together

theta_summ = []
for theta_mat in S:
    theta_vector = []
    for timestep in theta_mat:
        theta_vector.append(sum(timestep))
    theta_summ.append(theta_vector)

theta_summ

[[3.5, 0.825, 3.2, 1.8833333333333333, 1.05],
 [0.00215, 0.0014166666666666666, 0.0021000000000000003, 0.00115, 0.001375],
 [1.3, 1.3, 1.7, 1.35, 0.675],
 [0.00255, 0.001325, 0.0013, 0.0015166666666666668, 0.0017500000000000003]]

In [566]:
x = theta_summ[0]
y = theta_summ[0]
np.corrcoef(x,y)

array([[1., 1.],
       [1., 1.]])

In [573]:
#correlation matrix
Mc = [[0 for j in range(m)] for j in range(m)]
for j1 in range(m):
    for j2 in range(m):
        Mc[j1][j2] = np.corrcoef(theta_summ[j1],theta_summ[j2])[0][1]
for row in Mc:
    print(row)

[1.0, 0.8494852931132884, 0.601108877944958, 0.4842883469482159]
[0.8494852931132882, 1.0, 0.4815421014873943, 0.43663118582591326]
[0.601108877944958, 0.4815421014873943, 1.0, -0.2660634181980259]
[0.484288346948216, 0.43663118582591326, -0.2660634181980259, 1.0]


In [547]:
np.corrcoef(S[1][1],S[2][0])

array([[1., 1.],
       [1., 1.]])

Identifiability Analysis

In [476]:
#calculate magnitude of each column of a 3d array by the Euclidean norm
#param A: 3d array
#param N: length of A[0] (num time steps)
#param n: length of A[0][0] (num rxn species)
def magnitude(A,N,n):
    magnitudes = []
    for mat in A:
        summ = 0
        for k in range(N):
            for i in range(n):
                summ += abs(mat[k][i])**2 #why do you need abs if you're squaring it i donut know

        magn = (1/N) * np.sqrt(summ)
        magnitudes.append(magn)

    return magnitudes

In [477]:
ident_order = []
#1. 
#Calculate the magnitude of each column of S by the Euclidean norm 
S_magnitudes = magnitude(S,N,n)
#The parameter corresponding to the column with maximum magnitude is the first identifiable parameter
#this column is marked as XL (L=1)
XL_idx = S_magnitudes.index(max(S_magnitudes))
XL = np.array(S[XL_idx])
#print('First identifiable parameter: k{}'.format(XL_idx+1))
ident_order.append('k{}'.format(XL_idx+1))
XL

array([[0.25      , 0.33333333],
       [0.33333333, 1.25      ],
       [0.33333333, 0.25      ],
       [0.125     , 1.5       ],
       [0.375     , 0.625     ]])

![ResidualMat](./residual_mat_formula.png)

In [478]:
#2.
#formulate the residual matrix
I = np.identity(N) #identity matrix
XLT = np.transpose(XL)
RL = np.linalg.inv(np.matmul(XLT,XL))
RL = np.matmul(np.matmul(XL, RL), XLT)
RL = np.subtract(I,RL)
RL = np.matmul(RL,S)
XLplusone = [XL.tolist()]
#3.
#Calculate the magnitude of each column of RL. 
RL_magnitudes = magnitude(RL,N,n)
print(RL_magnitudes)
while len(RL_magnitudes)>1: #greater than 1 bc most identifiable param should have residual = 0 (I think?)
    for col in XLplusone:
        print(col)
    print('---------')
    #The column with the largest magnitude corresponds to the next identifiable parameter because it has the largest
    #effect on the response variables of all the remaining parameters, which is not correlated with the effects 
    #of the selected set of parameters. 
    nextID_idx = RL_magnitudes.index(max(RL_magnitudes))
    nextID = np.array(RL[nextID_idx])
    #Augment XL with the column of S corresponding to this parameter. The augmented matrix is denoted as XL+1
    XLplusone.append(S[nextID_idx])
    #remove max of RL_magnitudes so next iteration we get the next biggest
    RL_magnitudes = np.delete(RL_magnitudes,nextID_idx).tolist()
    #add to list of params ordered in identifiability
    ident_order.append('k{}'.format(nextID_idx+1))
    #Increase the iteration counter by 1 and repeat steps 2-4 for all parameters or until
    #the maximum magnitude of the columns of RL is less than a pre-specified threshold. 

[1.1505179923744555e-16, 0.00017358600953634512, 0.11795322210454906]
[[0.25, 0.3333333333333333], [0.3333333333333333, 1.25], [0.3333333333333333, 0.25], [0.125, 1.5], [0.375, 0.625]]
---------
[[0.25, 0.3333333333333333], [0.3333333333333333, 1.25], [0.3333333333333333, 0.25], [0.125, 1.5], [0.375, 0.625]]
[[0.5, 0.5], [0.8333333333333334, 0.25], [0.8333333333333334, 0.75], [0.125, 0.5], [0.5, 0.625]]
---------


In [479]:
ident_order

['k1', 'k3', 'k2']